3 changed files with 1 additions and 147 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,2 +0,0 @@
-.venv/
-.ipynb_checkpoints/
--- a/create_db.py
+++ b/create_db.py
@ -4,8 +4,6 @@ import io
 import numpy as np
 import sys
 import os
-import re
-import time
 import logging
 import argparse as arg

@ -53,9 +51,6 @@ def parse_args():
    parser.add_argument('--states',
            help='states raw csv file (inside source follder)',
            default='region2021.csv')
-    parser.add_argument('--statistics',
-            help='statistics raw csv file to import',
-            default='statistiques.csv')
    debug_group = parser.add_mutually_exclusive_group()
    debug_group.add_argument('--verbose', '-V',
            help='Verbose output',
@ -103,58 +98,6 @@ def import_towns_csv(raw_file):
    return towns.loc[towns['TYPECOM'] == 'COM', ['COM','NCC', 'LIBELLE', 'DEP']]


-
-def import_statistics_csv(raw_file):
-    """
-    Process stats files
-    """
-
-    logger.info('import town from {}'.format(raw_file))
-    stats = pd.read_csv(raw_file,
-        usecols=["CODGEO","SUPERF","P18_POP","P13_POP","P08_POP","D99_POP",
-            "NAIS1318","NAIS0813","NAIS9908","NAIS9099","NAIS8290","DECE1318",
-            "DECE0813","DECE9908","DECE9099","DECE8290","P18_LOG","P13_LOG",
-            "P08_LOG","D99_LOG","D90_LOG","D82_LOG", "P18_LOGVAC","P13_LOGVAC",
-            "P08_LOGVAC","D99_LOGVAC","D90_LOGVAC","D82_LOGVAC","P18_RP",
-            "P13_RP","P08_RP","D99_RP","D90_RP","D82_RP", "P18_RSECOCC",
-            "P13_RSECOCC","P08_RSECOCC","D99_RSECOCC","D90_RSECOCC",
-            "D82_RSECOCC"],
-            sep=';')
-    return stats
-
-def get_single_date(attr):
-    logger.debug('get a date from {}'.format(attr))
-    m = re.match('^[D,P]([0-9]{2}).*$', attr)
-    if m.group(1) is None:
-        logger.error('Cand determine single date from {}'.format(attr))
-
-    if m.group(1).startswith(tuple(['0','1','2'])):
-        return 2000 + int(m.group(1)), 'null'
-    else:
-        return 1900 + int(m.group(1)), 'null'
-
-def get_range_date(attr):
-    logger.debug('get date range from {}'.format(attr))
-    m = re.match('^[A-Z]*([0-9]{2})([0-9]{2}).*$', attr)
-    date = {}
-    try:
-        for i in [1,2]:
-            logger.debug('Process two digits: {}'.format(m.group(i)))
-            if m.group(i).startswith(tuple(['0','1','2'])):
-                date[i] = 2000 + int(m.group(i))
-            else:
-                date[i] = 1900 + int(m.group(i))
-    except Exception as e:
-        logger.error(
-            'Error when trying to determine daterange from {} - {}'.format(
-                attr,
-                e
-            )
-        )
-        return None, None
-
-    return date[1], date[2]
-
 if __name__ == '__main__':
    args = parse_args()
    
@ -174,6 +117,7 @@ if __name__ == '__main__':
        logger.setLevel(logging.DEBUG)
        logger.debug('DEBUG mode activated')
    
+    
    if not os.path.exists(args.source + '/' + args.states):
        logger.critical('can\'t find source file for states')
        sys.exit(1)
@ -193,92 +137,4 @@ if __name__ == '__main__':
    towns = import_towns_csv(args.source + '/' + args.towns)
    logger.debug(towns)
    
-    if not os.path.exists(args.source + '/' + args.statistics):
-        logger.critical('can\'t find source file for statistics')
-        sys.exit(1)
-    statistics = import_statistics_csv(args.source + '/' + args.statistics)
-    logger.debug(statistics)
-    
-    # Create missing table : indicators
-    indicators = pd.DataFrame({'indicateur': [
-        'population', 
-        'naissances',
-        'deces',
-        'logements',
-        'logements vacants',
-        'residences principales', 
-        'residences secondaires et logements occasionnels'
-    ],
-    'code': ['_POP','NAIS', 'DECE','_LOG', '_LOGVAC', '_RP',  '_RSECOCC']},
-        index=[1,2,3,4,5,6,7])
-    logger.debug(indicators)
-    
-    ## Create departments capitals
-    dep_capitals = departments[['CHEFLIEU','DEP']]
-    dep_capitals.columns = ["CHEFLIEUDEP","DEP"]
-    departments = departments[["DEP","NCC","LIBELLE","REG"]]
-    logger.debug(dep_capitals)
-
-    ## Create states capitals
-    states_capitals = states[['CHEFLIEU','REG']]
-    states_capitals.columns = ["CHEFLIEUREG","REG"]
-    departments = departments[["REG","NCC","LIBELLE"]]
-    logger.debug(states_capitals)
-    
-    ## create statistics dataframes
-    # 
-    # We need to first iterate on statistics
-    if args.verbose or arg.debug:
-        t_begin = time.time()
-        logger.info('BEGIN - import stats')
-        
-    c_stats = pd.DataFrame(columns = ['com','id_indicateur','date_debut',
-        'date_fin','valeur']
-    )
-    temp = {"com" : [], "id_indicateur" : [], 
-        "date_debut" : [], 
-        "date_fin" : [], 
-        "valeur" : []
-    }
-    for s_index,srow in statistics.iterrows():
-        for index, irow in indicators.iterrows():
-            if irow['code'].startswith('_'):
-                regex = irow['code'] + '$'
-            else:
-                regex = '^' + irow['code']
-
-            logger.debug('Process indicator {}'.format(regex))
-            selection = srow.filter(regex=regex)
-            for attribute, value in selection.items():
-                logger.debug('check code: {}'.format(irow['code']))
-                if irow['code'].startswith('_'):
-                    start,end = get_single_date(attribute)
-                else:
-                    start,end = get_range_date(attribute)
-                
-                if start is None or end is None:
-                   logger.error('Can\'t process line, continue to next')
-                   continue
-
-                logger.debug(
-                    'town:{}, id_indic: {}, start: {}, end: {}, value:{}'
-                    .format(
-                        srow['CODGEO'],
-                        index,
-                        start,
-                        end,
-                        value
-                    )
-                )
-
-                temp['com'].append(srow['CODGEO'])
-                temp['id_indicateur'].append(index)
-                temp['date_debut'].append(start)
-                temp['date_fin'].append(end)
-                temp['valeur'].append(value)
-
-    if args.verbose or arg.debug:
-        t_end = time.time()
-        logger.info('END stats import, time: {} seconds'.format(t_end - t_begin))
-
    sys.exit()
--- a/notebook.ipynb
+++ b/notebook.ipynb