datasus_db.datasources.ibge_pop

 1import polars as pl
 2import os.path as path
 3import logging
 4from ..pl_utils import to_schema, Column
 5from ..views.ibge_piramide_etaria import create_piramide_etaria_view
 6from ..datasus import import_from_ftp
 7from ..utils import format_year
 8from ..ftp import fetch_from_zip
 9
10MAIN_TABLE = "IBGE_POP"
11
12
13def import_ibge_pop(db_file="datasus.db", years=["*"]):
14    """Import IBGE population by age and sex per city.
15    https://google.com
16
17    Args:
18        db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db".
19        years (list, optional): list of years for which data will be imported (if available). Eg: `[2012, 2000, 2010]. Defaults to ["*"].
20
21    ---
22
23    Extra:
24    - **Data description**: https://github.com/mymatsubara/datasus-db/blob/main/docs/ibge_pop.pdf
25    - **ftp path**: ftp.datasus.gov.br/dissemin/publicos/IBGE/POP/POPBR*.zip
26    """
27    logging.info(f"⏳ [{MAIN_TABLE}] Starting import...")
28
29    import_from_ftp(
30        [MAIN_TABLE],
31        [
32            f"/dissemin/publicos/IBGE/POP/POPBR{format_year(year)}.zip*"
33            for year in years
34        ],
35        fetch_ibge_pop,
36        db_file=db_file,
37    )
38
39    create_piramide_etaria_view()
40
41
42def fetch_ibge_pop(ftp_path: str):
43    csv_file = path.basename(ftp_path).split(".")[0] + ".csv"
44    files = fetch_from_zip(ftp_path, [csv_file])
45    df = pl.read_csv(
46        files[csv_file],
47        schema={
48            "MUNIC_RES": pl.UInt32,
49            "ANO": pl.UInt32,
50            "SEXO": pl.UInt32,
51            "SITUACAO": pl.UInt32,
52            "FXETARIA": pl.Utf8,
53            "POPULACAO": pl.UInt32,
54        },
55    )
56
57    return {MAIN_TABLE: map_ibge_pop(df)}
58
59
60def map_ibge_pop(df: pl.DataFrame):
61    df = (
62        df.with_columns(
63            pl.when(pl.col("FXETARIA").is_in(["I000", "R000"]))
64            .then("-100")
65            .otherwise(pl.col("FXETARIA"))
66            .name.keep(),
67        )
68        .with_columns(
69            pl.col("FXETARIA").cast(pl.Int64),
70        )
71        .with_columns(
72            pl.when(pl.col("FXETARIA") == 0)
73            .then(0)
74            .otherwise(pl.col("FXETARIA") // 100)
75            .alias("INICIO_FXETARIA"),
76            pl.when(pl.col("FXETARIA") == 0)
77            .then(0)
78            .otherwise(pl.col("FXETARIA") % 100)
79            .alias("FIM_FXETARIA"),
80        )
81    )
82
83    return to_schema(
84        df,
85        [
86            Column("MUNIC_RES", pl.UInt32),
87            Column("ANO", pl.UInt16),
88            Column("SEXO", pl.UInt8),
89            Column("SITUACAO", pl.UInt8),
90            Column("INICIO_FXETARIA", pl.Int8),
91            Column("FIM_FXETARIA", pl.Int8),
92            Column("POPULACAO", pl.UInt32),
93        ],
94    )
MAIN_TABLE = 'IBGE_POP'
def import_ibge_pop(db_file='datasus.db', years=['*']):
14def import_ibge_pop(db_file="datasus.db", years=["*"]):
15    """Import IBGE population by age and sex per city.
16    https://google.com
17
18    Args:
19        db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db".
20        years (list, optional): list of years for which data will be imported (if available). Eg: `[2012, 2000, 2010]. Defaults to ["*"].
21
22    ---
23
24    Extra:
25    - **Data description**: https://github.com/mymatsubara/datasus-db/blob/main/docs/ibge_pop.pdf
26    - **ftp path**: ftp.datasus.gov.br/dissemin/publicos/IBGE/POP/POPBR*.zip
27    """
28    logging.info(f"⏳ [{MAIN_TABLE}] Starting import...")
29
30    import_from_ftp(
31        [MAIN_TABLE],
32        [
33            f"/dissemin/publicos/IBGE/POP/POPBR{format_year(year)}.zip*"
34            for year in years
35        ],
36        fetch_ibge_pop,
37        db_file=db_file,
38    )
39
40    create_piramide_etaria_view()

Import IBGE population by age and sex per city. https://google.com

Arguments:
  • db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db".
  • years (list, optional): list of years for which data will be imported (if available). Eg: `[2012, 2000, 2010]. Defaults to ["*"].

Extra:

def fetch_ibge_pop(ftp_path: str):
43def fetch_ibge_pop(ftp_path: str):
44    csv_file = path.basename(ftp_path).split(".")[0] + ".csv"
45    files = fetch_from_zip(ftp_path, [csv_file])
46    df = pl.read_csv(
47        files[csv_file],
48        schema={
49            "MUNIC_RES": pl.UInt32,
50            "ANO": pl.UInt32,
51            "SEXO": pl.UInt32,
52            "SITUACAO": pl.UInt32,
53            "FXETARIA": pl.Utf8,
54            "POPULACAO": pl.UInt32,
55        },
56    )
57
58    return {MAIN_TABLE: map_ibge_pop(df)}
def map_ibge_pop(df: polars.dataframe.frame.DataFrame):
61def map_ibge_pop(df: pl.DataFrame):
62    df = (
63        df.with_columns(
64            pl.when(pl.col("FXETARIA").is_in(["I000", "R000"]))
65            .then("-100")
66            .otherwise(pl.col("FXETARIA"))
67            .name.keep(),
68        )
69        .with_columns(
70            pl.col("FXETARIA").cast(pl.Int64),
71        )
72        .with_columns(
73            pl.when(pl.col("FXETARIA") == 0)
74            .then(0)
75            .otherwise(pl.col("FXETARIA") // 100)
76            .alias("INICIO_FXETARIA"),
77            pl.when(pl.col("FXETARIA") == 0)
78            .then(0)
79            .otherwise(pl.col("FXETARIA") % 100)
80            .alias("FIM_FXETARIA"),
81        )
82    )
83
84    return to_schema(
85        df,
86        [
87            Column("MUNIC_RES", pl.UInt32),
88            Column("ANO", pl.UInt16),
89            Column("SEXO", pl.UInt8),
90            Column("SITUACAO", pl.UInt8),
91            Column("INICIO_FXETARIA", pl.Int8),
92            Column("FIM_FXETARIA", pl.Int8),
93            Column("POPULACAO", pl.UInt32),
94        ],
95    )