datasus_db.datasources.ibge_pop_tcu
1import polars as pl 2import os.path as path 3import logging 4from ..pl_utils import to_schema, Column 5from ..dbf import read_as_df 6from ..datasus import import_from_ftp 7from ..utils import format_year 8from ..ftp import fetch_from_zip 9 10MAIN_TABLE = "IBGE_POP_TCU" 11 12 13def import_ibge_pop_tcu(db_file="datasus.db", years=["*"]): 14 """Import population estimated per city by TCU (Tribunal de Contas da União). 15 16 Args: 17 db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db". 18 years (list, optional): list of years for which data will be imported (if available). Eg: `[2012, 2000, 2010]`. Defaults to ["*"]. 19 20 --- 21 22 Extra: 23 - **Data description**: https://github.com/mymatsubara/datasus-db/blob/main/docs/ibge_pop_tcu.pdf 24 - **ftp path**: ftp.datasus.gov.br/dissemin/publicos/IBGE/POPTCU/POPTBR*.zip 25 """ 26 logging.info(f"⏳ [{MAIN_TABLE}] Starting import...") 27 28 import_from_ftp( 29 [MAIN_TABLE], 30 [ 31 f"/dissemin/publicos/IBGE/POPTCU/POPTBR{format_year(year)}.zip*" 32 for year in years 33 ], 34 fetch_ibge_pop_tcu, 35 db_file, 36 ) 37 38 39def fetch_ibge_pop_tcu(ftp_path: str): 40 dbf_file = path.basename(ftp_path).split(".")[0] + ".dbf" 41 files = fetch_from_zip(ftp_path, [dbf_file]) 42 43 df = read_as_df(dbf_file, files[dbf_file]) 44 45 return {MAIN_TABLE: map_ibge_pop_tcu(df)} 46 47 48def map_ibge_pop_tcu(df: pl.DataFrame): 49 return to_schema( 50 df, 51 [ 52 Column("MUNIC_RES", pl.UInt32), 53 Column("ANO", pl.UInt16), 54 Column("POPULACAO", pl.UInt32), 55 ], 56 ).with_columns( 57 pl.when(pl.col("MUNIC_RES") >= 1_000_000) 58 .then(pl.col("MUNIC_RES") // 10) 59 .otherwise(pl.col("MUNIC_RES")) 60 .name.keep() 61 )
MAIN_TABLE =
'IBGE_POP_TCU'
def
import_ibge_pop_tcu(db_file='datasus.db', years=['*']):
14def import_ibge_pop_tcu(db_file="datasus.db", years=["*"]): 15 """Import population estimated per city by TCU (Tribunal de Contas da União). 16 17 Args: 18 db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db". 19 years (list, optional): list of years for which data will be imported (if available). Eg: `[2012, 2000, 2010]`. Defaults to ["*"]. 20 21 --- 22 23 Extra: 24 - **Data description**: https://github.com/mymatsubara/datasus-db/blob/main/docs/ibge_pop_tcu.pdf 25 - **ftp path**: ftp.datasus.gov.br/dissemin/publicos/IBGE/POPTCU/POPTBR*.zip 26 """ 27 logging.info(f"⏳ [{MAIN_TABLE}] Starting import...") 28 29 import_from_ftp( 30 [MAIN_TABLE], 31 [ 32 f"/dissemin/publicos/IBGE/POPTCU/POPTBR{format_year(year)}.zip*" 33 for year in years 34 ], 35 fetch_ibge_pop_tcu, 36 db_file, 37 )
Import population estimated per city by TCU (Tribunal de Contas da União).
Arguments:
- db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db".
- years (list, optional): list of years for which data will be imported (if available). Eg:
[2012, 2000, 2010]
. Defaults to ["*"].
Extra:
- Data description: https://github.com/mymatsubara/datasus-db/blob/main/docs/ibge_pop_tcu.pdf
- ftp path: ftp.datasus.gov.br/dissemin/publicos/IBGE/POPTCU/POPTBR*.zip
def
fetch_ibge_pop_tcu(ftp_path: str):
def
map_ibge_pop_tcu(df: polars.dataframe.frame.DataFrame):
49def map_ibge_pop_tcu(df: pl.DataFrame): 50 return to_schema( 51 df, 52 [ 53 Column("MUNIC_RES", pl.UInt32), 54 Column("ANO", pl.UInt16), 55 Column("POPULACAO", pl.UInt32), 56 ], 57 ).with_columns( 58 pl.when(pl.col("MUNIC_RES") >= 1_000_000) 59 .then(pl.col("MUNIC_RES") // 10) 60 .otherwise(pl.col("MUNIC_RES")) 61 .name.keep() 62 )