From ae463b854e91a64b3a4be83d5c6fa6b1161af5c7 Mon Sep 17 00:00:00 2001 From: Yorick Barbanneau Date: Tue, 3 May 2022 00:14:45 +0200 Subject: [PATCH 1/5] Add script to display statistics --- get_states_statistics.py | 162 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 162 insertions(+) create mode 100755 get_states_statistics.py diff --git a/get_states_statistics.py b/get_states_statistics.py new file mode 100755 index 0000000..60ca836 --- /dev/null +++ b/get_states_statistics.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python +import sys +import io +import re +import psycopg2 as pg +from psycopg2.extensions import AsIs +import logging +import argparse as arg +from classes.Timer import Timer +from classes.CustomFormater import CustomFormatter +import locale +locale.setlocale(locale.LC_ALL, 'fr_FR.UTF-8') + +# Schema name is NAMEDATALEN-1 (PGSQL source code) +# -> src/include/pg_config_manual.h +def check_schema_name(arg_value, pat=re.compile(r"^[a-z0-9A-Z]{1,63}$")): + if not pat.match(arg_value): + raise ValueError + return arg_value + +def parse_args(): + """ + Parse arguments + """ + parser = arg.ArgumentParser('Process csv files from INSEE') + parser.add_argument('--state', '-s', + help='states raw csv file (inside source follder)', + required=True + ) + + parser.add_argument('--connection-file', '-f', + help='Postgresql connexion file', + default='.pgconn' + ) + + + parser.add_argument('--schema-name', + help='Database schema name', + type=check_schema_name, + default='insee' + ) + debug_group = parser.add_mutually_exclusive_group() + debug_group.add_argument('--verbose', '-V', + help='Verbose output', + action='store_true') + debug_group.add_argument('--debug', '-d', + help='Activate debug mode', + action='store_true') + return parser.parse_args() + +if __name__ == '__main__': + + args = parse_args() + + #logging.basicConfig(level=logging.DEBUG) + logger = logging.getLogger() + tty_handler = logging.StreamHandler() + + # create console handler with a higher log level + tty_handler.setFormatter(CustomFormatter()) + logger.addHandler(tty_handler) + + if args.verbose is True: + logger.setLevel(logging.INFO) + logger.info('VERBOSE mode activated') + + if args.debug is True: + logger.setLevel(logging.DEBUG) + logger.debug('DEBUG mode activated') + + t = Timer(logger=logger.info) + + logging.debug('Import pgsql connection file {}'.format(args.connection_file)) + with open(args.connection_file) as cf: + pg_conn = cf.read() + + t.start('Get states') + conn = pg.connect(pg_conn) + with conn.cursor() as cur: + cur.execute( + """SELECT reg, libelle + FROM %(schema)s.region WHERE libelle = %(state)s + OR ncc = %(state)s;""", + {'state': args.state, 'schema': AsIs(args.schema_name)} + ) + try: + s_id, s_name = cur.fetchone() + except Exception as e: + logging.error('There is no state {}'.format(args.state)) + sys.exit(1) + + t.stop() + + print('Get information about {} - id: {}'.format(s_name, s_id)) + print('---') + + with conn.cursor() as cur: + cur.execute(""" + SELECT sum(c.superf) + FROM %(schema)s.commune c + INNER JOIN %(schema)s.departement d ON c.dep = d.dep + WHERE d.reg = %(state)s + """, + {'state': s_id, 'schema': AsIs(args.schema_name)} + ) + try: + surface = cur.fetchone() + logger.debug(surface) + except Exception as e: + logging.error('There is no response for {} surface'.format(args.state)) + sys.exit(1) + + print('surface: {:n}Km2'.format(surface[0])) + + with conn.cursor() as cur: + cur.execute(""" + SELECT sum(s.valeur)::numeric::integer + FROM %(schema)s.commune c + INNER JOIN %(schema)s.departement d ON c.dep = d.dep + INNER JOIN %(schema)s.statistique s ON c.com = s.com + WHERE s.id_indicateur = 1 + AND s.date_debut = 2018 + AND d.reg = %(state)s; + """, + {'state': s_id, 'schema': AsIs(args.schema_name)} + ) + try: + inhabitants = cur.fetchone() + logger.debug(inhabitants) + except Exception as e: + logging.error('There is no response for {} surface'.format(args.state)) + sys.exit(1) + + print('Population: {:n} inhabitans'.format(inhabitants[0])) + + with conn.cursor() as cur: + # get most populated city in state + # need to cast float to int (valeur) + cur.execute(""" + SELECT c.libelle, s.valeur::numeric::integer + FROM %(schema)s.commune c + INNER JOIN %(schema)s.departement d ON c.dep = d.dep + INNER JOIN %(schema)s.statistique s ON c.com = s.com + WHERE s.id_indicateur = 1 + AND s.date_debut = 2018 + AND d.reg = %(state)s + ORDER BY s.valeur DESC + LIMIT 10; + """, + {'state': s_id, 'schema': AsIs(args.schema_name)} + ) + try: + towns = cur.fetchall() + logger.debug(towns) + except Exception as e: + logging.error('There is no state {}'.format(args.state)) + sys.exit(1) + print('Most populated cities:\n') + for row in towns: + print('\t{:.<40}{:.>10n}'.format(*row)) + + sys.exit(0) From d1ea569bcdd727f1d2e27f3d6bad21f0d596ce40 Mon Sep 17 00:00:00 2001 From: Yorick Barbanneau Date: Tue, 3 May 2022 00:25:18 +0200 Subject: [PATCH 2/5] Add --year parameter --- get_states_statistics.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/get_states_statistics.py b/get_states_statistics.py index 60ca836..2465c21 100755 --- a/get_states_statistics.py +++ b/get_states_statistics.py @@ -33,6 +33,11 @@ def parse_args(): default='.pgconn' ) + parser.add_argument('--year', + help='Specify year needed to display statistics', + choices=['1982', '1990', '1999', '2008', '2013', '2018'], + default=2018 + ) parser.add_argument('--schema-name', help='Database schema name', @@ -101,7 +106,7 @@ if __name__ == '__main__': INNER JOIN %(schema)s.departement d ON c.dep = d.dep WHERE d.reg = %(state)s """, - {'state': s_id, 'schema': AsIs(args.schema_name)} + {'state': s_id, 'schema': AsIs(args.schema_name), 'tear': args.year } ) try: surface = cur.fetchone() @@ -119,10 +124,10 @@ if __name__ == '__main__': INNER JOIN %(schema)s.departement d ON c.dep = d.dep INNER JOIN %(schema)s.statistique s ON c.com = s.com WHERE s.id_indicateur = 1 - AND s.date_debut = 2018 + AND s.date_debut = %(year)s AND d.reg = %(state)s; """, - {'state': s_id, 'schema': AsIs(args.schema_name)} + {'state': s_id, 'schema': AsIs(args.schema_name), 'year': args.year} ) try: inhabitants = cur.fetchone() @@ -142,12 +147,12 @@ if __name__ == '__main__': INNER JOIN %(schema)s.departement d ON c.dep = d.dep INNER JOIN %(schema)s.statistique s ON c.com = s.com WHERE s.id_indicateur = 1 - AND s.date_debut = 2018 + AND s.date_debut = %(year)s AND d.reg = %(state)s ORDER BY s.valeur DESC LIMIT 10; """, - {'state': s_id, 'schema': AsIs(args.schema_name)} + {'state': s_id, 'schema': AsIs(args.schema_name), 'year': args.year} ) try: towns = cur.fetchall() From 3c5d5dfc075b8d77b55088e7b4463dbfb1cc53fb Mon Sep 17 00:00:00 2001 From: Yorick Barbanneau Date: Tue, 3 May 2022 00:26:25 +0200 Subject: [PATCH 3/5] Correction on PSQL Procedure --- createdatabase.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/createdatabase.py b/createdatabase.py index 7303474..6ae117c 100755 --- a/createdatabase.py +++ b/createdatabase.py @@ -184,7 +184,7 @@ if __name__ == '__main__': REC RECORD; BEGIN - FOR REC IN (SELECT id_indicateur, ncc, SUM valeur + FOR REC IN (SELECT id_departement, ncc, SUM valeur FROM %(schema)s.view_indicateur_dep v WHERE id_indicateur = 1 AND date_debut = '2018') LOOP From 131ec5f070a0bfa7a02f621d03ba82a26bb3cdcf Mon Sep 17 00:00:00 2001 From: Yorick Barbanneau Date: Tue, 3 May 2022 00:50:12 +0200 Subject: [PATCH 4/5] Add update trigger on department and state table --- createdatabase.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/createdatabase.py b/createdatabase.py index 6ae117c..7b87b08 100755 --- a/createdatabase.py +++ b/createdatabase.py @@ -244,4 +244,30 @@ if __name__ == '__main__': ) conn.commit() t.stop() + + t.start('Add Modify Trigger') + with conn.cursor() as curs: + curs.execute(""" + CREATE OR REPLACE FUNCTION %(schema)s.block_maj_reg_dep() + RETURNS TRIGGER AS $bloquage$ + BEGIN + RAISE EXCEPTION + 'Mise à jour non autorisé'; + END; + $bloquage$ language plpgsql; + + CREATE TRIGGER TRG_BLOQ_MAJ_REG + BEFORE INSERT OR UPDATE OR DELETE ON %(schema)s.region + + FOR EACH ROW EXECUTE PROCEDURE %(schema)s.block_maj_reg_dep(); + + CREATE TRIGGER TRG_BLOQ_MAJ_DEP + BEFORE INSERT OR UPDATE OR DELETE ON %(schema)s.departement + + FOR EACH ROW EXECUTE PROCEDURE %(schema)s.block_maj_reg_dep(); + """, + {'schema':AsIs(args.schema_name)}) + conn.commit() + + t.stop() conn.close() From 87402c06fc55c0767579a1c0a8b3e5b18acb5b13 Mon Sep 17 00:00:00 2001 From: Yorick Barbanneau Date: Tue, 3 May 2022 00:56:01 +0200 Subject: [PATCH 5/5] Call department and region procedure after import data --- createdatabase.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/createdatabase.py b/createdatabase.py index 7b87b08..f0317c9 100755 --- a/createdatabase.py +++ b/createdatabase.py @@ -245,6 +245,13 @@ if __name__ == '__main__': conn.commit() t.stop() + t.start('Call procedure') + with conn.cursor() as curs: + curs.execute("CALL %(schema)s.PRC_POP_REG_DEP()", + {'schema':AsIs(args.schema_name)} + ) + t.stop() + t.start('Add Modify Trigger') with conn.cursor() as curs: curs.execute("""