#!/usr/bin/env python import pandas as pd import io import numpy as np import sys import os import logging import argparse as arg class CustomFormatter(logging.Formatter): grey = "\x1b[38;20m" blue = "\x1b[34;20m" yellow = "\x1b[33;20m" red = "\x1b[31;20m" bold_red = "\x1b[31;1m" reset = "\x1b[0m" format = "%(levelname)s: %(message)s (%(filename)s:%(lineno)d)" FORMATS = { logging.DEBUG: blue + format + reset, logging.INFO: grey + format + reset, logging.WARNING: yellow + format + reset, logging.ERROR: red + format + reset, logging.CRITICAL: bold_red + format + reset } def format(self, record): log_fmt = self.FORMATS.get(record.levelno) formatter = logging.Formatter(log_fmt) return formatter.format(record) def parse_args(): """ Parse arguments """ parser = arg.ArgumentParser('Process csv files from INSEE') parser.add_argument('--source', '-s', help='csv source directory', default='csv') parser.add_argument('--export', '-e', help='processeced csv directory', default='exports') parser.add_argument('--towns', help='town raw csv file (inside source follder)', default='commune2021.csv') parser.add_argument('--departments', help='departments raw csv file (inside source follder)', default='departement2021.csv') parser.add_argument('--states', help='states raw csv file (inside source follder)', default='region2021.csv') parser.add_argument('--verbose', '-V', help='Verbose output', action='store_true') return parser.parse_args() def import_states_csv(raw_file): """ Process states raw file """ reg_convert= lambda x: x if len(str(x)) == 2 else f'0{x}' states = pd.read_csv(raw_file, usecols=["REG","NCC","LIBELLE","CHEFLIEU"], converters={'REG': reg_convert}) return states def import_department_csv(raw_file): """ Process department files """ reg_convert= lambda x: x if len(str(x)) == 2 else f'0{x}' dep = pd.read_csv(raw_file, usecols=["DEP","NCC","LIBELLE","REG","CHEFLIEU"], converters={'REG':reg_convert}) return dep def import_towns_csv(raw_file): """ Process department files """ towns = pd.read_csv(raw_file, usecols=["COM","TYPECOM","NCC","LIBELLE","DEP"]) return towns.loc[towns['TYPECOM'] == 'COM', ['COM','NCC', 'LIBELLE', 'DEP']] if __name__ == '__main__': args = parse_args() #logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger() if args.verbose is True: logger.setLevel(logging.DEBUG) logger.debug('Debug mode activated') tty_handler = logging.StreamHandler() # create console handler with a higher log level tty_handler.setFormatter(CustomFormatter()) logger.addHandler(tty_handler) if not os.path.exists(args.source + '/' + args.states): logger.critical('can\'t find source file for states') states = import_states_csv(args.source + '/' + args.states) print(states) if not os.path.exists(args.source + '/' + args.states): logger.critical('can\'t find source file for departments') departments = import_department_csv(args.source + '/' + args.departments) print(departments) if not os.path.exists(args.source + '/' + args.towns): logger.critical('can\'t find source file for departments') towns = import_towns_csv(args.source + '/' + args.towns) print(towns) sys.exit()