datasus_db.datasources.ibge_pop
1import polars as pl 2import os.path as path 3import logging 4from ..pl_utils import to_schema, Column 5from ..views.ibge_piramide_etaria import create_piramide_etaria_view 6from ..datasus import import_from_ftp 7from ..utils import format_year 8from ..ftp import fetch_from_zip 9 10MAIN_TABLE = "IBGE_POP" 11 12 13def import_ibge_pop(db_file="datasus.db", years=["*"]): 14 """Import IBGE population by age and sex per city. 15 https://google.com 16 17 Args: 18 db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db". 19 years (list, optional): list of years for which data will be imported (if available). Eg: `[2012, 2000, 2010]. Defaults to ["*"]. 20 21 --- 22 23 Extra: 24 - **Data description**: https://github.com/mymatsubara/datasus-db/blob/main/docs/ibge_pop.pdf 25 - **ftp path**: ftp.datasus.gov.br/dissemin/publicos/IBGE/POP/POPBR*.zip 26 """ 27 logging.info(f"⏳ [{MAIN_TABLE}] Starting import...") 28 29 import_from_ftp( 30 [MAIN_TABLE], 31 [ 32 f"/dissemin/publicos/IBGE/POP/POPBR{format_year(year)}.zip*" 33 for year in years 34 ], 35 fetch_ibge_pop, 36 db_file=db_file, 37 ) 38 39 create_piramide_etaria_view() 40 41 42def fetch_ibge_pop(ftp_path: str): 43 csv_file = path.basename(ftp_path).split(".")[0] + ".csv" 44 files = fetch_from_zip(ftp_path, [csv_file]) 45 df = pl.read_csv( 46 files[csv_file], 47 schema={ 48 "MUNIC_RES": pl.UInt32, 49 "ANO": pl.UInt32, 50 "SEXO": pl.UInt32, 51 "SITUACAO": pl.UInt32, 52 "FXETARIA": pl.Utf8, 53 "POPULACAO": pl.UInt32, 54 }, 55 ) 56 57 return {MAIN_TABLE: map_ibge_pop(df)} 58 59 60def map_ibge_pop(df: pl.DataFrame): 61 df = ( 62 df.with_columns( 63 pl.when(pl.col("FXETARIA").is_in(["I000", "R000"])) 64 .then("-100") 65 .otherwise(pl.col("FXETARIA")) 66 .name.keep(), 67 ) 68 .with_columns( 69 pl.col("FXETARIA").cast(pl.Int64), 70 ) 71 .with_columns( 72 pl.when(pl.col("FXETARIA") == 0) 73 .then(0) 74 .otherwise(pl.col("FXETARIA") // 100) 75 .alias("INICIO_FXETARIA"), 76 pl.when(pl.col("FXETARIA") == 0) 77 .then(0) 78 .otherwise(pl.col("FXETARIA") % 100) 79 .alias("FIM_FXETARIA"), 80 ) 81 ) 82 83 return to_schema( 84 df, 85 [ 86 Column("MUNIC_RES", pl.UInt32), 87 Column("ANO", pl.UInt16), 88 Column("SEXO", pl.UInt8), 89 Column("SITUACAO", pl.UInt8), 90 Column("INICIO_FXETARIA", pl.Int8), 91 Column("FIM_FXETARIA", pl.Int8), 92 Column("POPULACAO", pl.UInt32), 93 ], 94 )
MAIN_TABLE =
'IBGE_POP'
def
import_ibge_pop(db_file='datasus.db', years=['*']):
14def import_ibge_pop(db_file="datasus.db", years=["*"]): 15 """Import IBGE population by age and sex per city. 16 https://google.com 17 18 Args: 19 db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db". 20 years (list, optional): list of years for which data will be imported (if available). Eg: `[2012, 2000, 2010]. Defaults to ["*"]. 21 22 --- 23 24 Extra: 25 - **Data description**: https://github.com/mymatsubara/datasus-db/blob/main/docs/ibge_pop.pdf 26 - **ftp path**: ftp.datasus.gov.br/dissemin/publicos/IBGE/POP/POPBR*.zip 27 """ 28 logging.info(f"⏳ [{MAIN_TABLE}] Starting import...") 29 30 import_from_ftp( 31 [MAIN_TABLE], 32 [ 33 f"/dissemin/publicos/IBGE/POP/POPBR{format_year(year)}.zip*" 34 for year in years 35 ], 36 fetch_ibge_pop, 37 db_file=db_file, 38 ) 39 40 create_piramide_etaria_view()
Import IBGE population by age and sex per city. https://google.com
Arguments:
- db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db".
- years (list, optional): list of years for which data will be imported (if available). Eg: `[2012, 2000, 2010]. Defaults to ["*"].
Extra:
- Data description: https://github.com/mymatsubara/datasus-db/blob/main/docs/ibge_pop.pdf
- ftp path: ftp.datasus.gov.br/dissemin/publicos/IBGE/POP/POPBR*.zip
def
fetch_ibge_pop(ftp_path: str):
43def fetch_ibge_pop(ftp_path: str): 44 csv_file = path.basename(ftp_path).split(".")[0] + ".csv" 45 files = fetch_from_zip(ftp_path, [csv_file]) 46 df = pl.read_csv( 47 files[csv_file], 48 schema={ 49 "MUNIC_RES": pl.UInt32, 50 "ANO": pl.UInt32, 51 "SEXO": pl.UInt32, 52 "SITUACAO": pl.UInt32, 53 "FXETARIA": pl.Utf8, 54 "POPULACAO": pl.UInt32, 55 }, 56 ) 57 58 return {MAIN_TABLE: map_ibge_pop(df)}
def
map_ibge_pop(df: polars.dataframe.frame.DataFrame):
61def map_ibge_pop(df: pl.DataFrame): 62 df = ( 63 df.with_columns( 64 pl.when(pl.col("FXETARIA").is_in(["I000", "R000"])) 65 .then("-100") 66 .otherwise(pl.col("FXETARIA")) 67 .name.keep(), 68 ) 69 .with_columns( 70 pl.col("FXETARIA").cast(pl.Int64), 71 ) 72 .with_columns( 73 pl.when(pl.col("FXETARIA") == 0) 74 .then(0) 75 .otherwise(pl.col("FXETARIA") // 100) 76 .alias("INICIO_FXETARIA"), 77 pl.when(pl.col("FXETARIA") == 0) 78 .then(0) 79 .otherwise(pl.col("FXETARIA") % 100) 80 .alias("FIM_FXETARIA"), 81 ) 82 ) 83 84 return to_schema( 85 df, 86 [ 87 Column("MUNIC_RES", pl.UInt32), 88 Column("ANO", pl.UInt16), 89 Column("SEXO", pl.UInt8), 90 Column("SITUACAO", pl.UInt8), 91 Column("INICIO_FXETARIA", pl.Int8), 92 Column("FIM_FXETARIA", pl.Int8), 93 Column("POPULACAO", pl.UInt32), 94 ], 95 )