From e0b69e18f1105f40be63b00d478d60c5ce2e7ec2 Mon Sep 17 00:00:00 2001 From: Yorick Barbanneau Date: Tue, 19 Apr 2022 22:40:35 +0200 Subject: [PATCH] Process towns DataFrame Add area colum from statistics --- create_db.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/create_db.py b/create_db.py index 08a6973..6a7e641 100755 --- a/create_db.py +++ b/create_db.py @@ -111,6 +111,7 @@ def import_statistics_csv(raw_file): """ logger.info('import town from {}'.format(raw_file)) + stats_convert= lambda x: x if len(str(x)) == 5 else f'0{x}' stats = pd.read_csv(raw_file, usecols=["CODGEO","SUPERF","P18_POP","P13_POP","P08_POP","D99_POP", "NAIS1318","NAIS0813","NAIS9908","NAIS9099","NAIS8290","DECE1318", @@ -120,7 +121,9 @@ def import_statistics_csv(raw_file): "P13_RP","P08_RP","D99_RP","D90_RP","D82_RP", "P18_RSECOCC", "P13_RSECOCC","P08_RSECOCC","D99_RSECOCC","D90_RSECOCC", "D82_RSECOCC"], - sep=';') + sep=';', + converters={'CODGEO':stats_convert} + ) return stats def get_single_date(attr): @@ -241,8 +244,7 @@ if __name__ == '__main__': ## create statistics dataframes # # We need to first iterate on statistics - if args.verbose or args.debug: - t.start() + t.start('Process_Statistics') c_stats = pd.DataFrame(columns = ['com','id_indicateur','date_debut', 'date_fin','valeur'] @@ -290,6 +292,18 @@ if __name__ == '__main__': temp['valeur'].append(value) t.stop() + + t.start('Process_town') + print(statistics[['SUPERF','CODGEO']]) + towns = pd.merge(towns, + statistics[['CODGEO', 'SUPERF']], + left_on=['COM'], + right_on=['CODGEO'], + how = 'left' + )[['COM','NCC','LIBELLE', 'DEP', 'SUPERF']] + t.stop() + logger.debug(towns) + t.get_total_time() sys.exit()