140 lines
4.1 KiB
Python
Executable file
140 lines
4.1 KiB
Python
Executable file
#!/usr/bin/env python
|
|
import pandas as pd
|
|
import io
|
|
import numpy as np
|
|
import sys
|
|
import os
|
|
import logging
|
|
import argparse as arg
|
|
|
|
class CustomFormatter(logging.Formatter):
|
|
|
|
grey = "\x1b[0;35m"
|
|
blue = "\x1b[34;20m"
|
|
yellow = "\x1b[33;20m"
|
|
red = "\x1b[31;20m"
|
|
bold_red = "\x1b[31;1m"
|
|
reset = "\x1b[0m"
|
|
format = "%(levelname)s: %(message)s (%(filename)s:%(lineno)d)"
|
|
|
|
FORMATS = {
|
|
logging.DEBUG: blue + format + reset,
|
|
logging.INFO: grey + format + reset,
|
|
logging.WARNING: yellow + format + reset,
|
|
logging.ERROR: red + format + reset,
|
|
logging.CRITICAL: bold_red + format + reset
|
|
}
|
|
|
|
def format(self, record):
|
|
log_fmt = self.FORMATS.get(record.levelno)
|
|
formatter = logging.Formatter(log_fmt)
|
|
return formatter.format(record)
|
|
|
|
|
|
def parse_args():
|
|
"""
|
|
Parse arguments
|
|
"""
|
|
parser = arg.ArgumentParser('Process csv files from INSEE')
|
|
parser.add_argument('--source', '-s',
|
|
help='csv source directory',
|
|
default='csv')
|
|
parser.add_argument('--export', '-e',
|
|
help='processeced csv directory',
|
|
default='exports')
|
|
parser.add_argument('--towns',
|
|
help='town raw csv file (inside source follder)',
|
|
default='commune2021.csv')
|
|
parser.add_argument('--departments',
|
|
help='departments raw csv file (inside source follder)',
|
|
default='departement2021.csv')
|
|
parser.add_argument('--states',
|
|
help='states raw csv file (inside source follder)',
|
|
default='region2021.csv')
|
|
debug_group = parser.add_mutually_exclusive_group()
|
|
debug_group.add_argument('--verbose', '-V',
|
|
help='Verbose output',
|
|
action='store_true')
|
|
debug_group.add_argument('--debug', '-d',
|
|
help='Activate debug mode',
|
|
action='store_true')
|
|
return parser.parse_args()
|
|
|
|
|
|
def import_states_csv(raw_file):
|
|
"""
|
|
Process states raw file
|
|
"""
|
|
|
|
logger.info('import states from {}'.format(raw_file))
|
|
reg_convert= lambda x: x if len(str(x)) == 2 else f'0{x}'
|
|
states = pd.read_csv(raw_file,
|
|
usecols=["REG","NCC","LIBELLE","CHEFLIEU"],
|
|
converters={'REG': reg_convert})
|
|
return states
|
|
|
|
|
|
def import_department_csv(raw_file):
|
|
"""
|
|
Process department files
|
|
"""
|
|
|
|
logger.info('import departments from {}'.format(raw_file))
|
|
reg_convert= lambda x: x if len(str(x)) == 2 else f'0{x}'
|
|
dep = pd.read_csv(raw_file,
|
|
usecols=["DEP","NCC","LIBELLE","REG","CHEFLIEU"],
|
|
converters={'REG':reg_convert})
|
|
return dep
|
|
|
|
|
|
def import_towns_csv(raw_file):
|
|
"""
|
|
Process department files
|
|
"""
|
|
|
|
logger.info('import town from {}'.format(raw_file))
|
|
towns = pd.read_csv(raw_file,
|
|
usecols=["COM","TYPECOM","NCC","LIBELLE","DEP"])
|
|
return towns.loc[towns['TYPECOM'] == 'COM', ['COM','NCC', 'LIBELLE', 'DEP']]
|
|
|
|
|
|
if __name__ == '__main__':
|
|
args = parse_args()
|
|
|
|
#logging.basicConfig(level=logging.DEBUG)
|
|
logger = logging.getLogger()
|
|
tty_handler = logging.StreamHandler()
|
|
|
|
# create console handler with a higher log level
|
|
tty_handler.setFormatter(CustomFormatter())
|
|
logger.addHandler(tty_handler)
|
|
|
|
if args.verbose is True:
|
|
logger.setLevel(logging.INFO)
|
|
logger.info('VERBOSE mode activated')
|
|
|
|
if args.debug is True:
|
|
logger.setLevel(logging.DEBUG)
|
|
logger.debug('DEBUG mode activated')
|
|
|
|
|
|
if not os.path.exists(args.source + '/' + args.states):
|
|
logger.critical('can\'t find source file for states')
|
|
sys.exit(1)
|
|
|
|
states = import_states_csv(args.source + '/' + args.states)
|
|
logger.debug(states)
|
|
|
|
if not os.path.exists(args.source + '/' + args.departments):
|
|
logger.critical('can\'t find source file for departments')
|
|
sys.exit(1)
|
|
departments = import_department_csv(args.source + '/' + args.departments)
|
|
logger.debug(departments)
|
|
|
|
if not os.path.exists(args.source + '/' + args.towns):
|
|
logger.critical('can\'t find source file for departments')
|
|
sys.exit(1)
|
|
towns = import_towns_csv(args.source + '/' + args.towns)
|
|
logger.debug(towns)
|
|
|
|
sys.exit()
|