datasus_db.datasources.ibge_pop_tcu

 1import polars as pl
 2import os.path as path
 3import logging
 4from ..pl_utils import to_schema, Column
 5from ..dbf import read_as_df
 6from ..datasus import import_from_ftp
 7from ..utils import format_year
 8from ..ftp import fetch_from_zip
 9
10MAIN_TABLE = "IBGE_POP_TCU"
11
12
13def import_ibge_pop_tcu(db_file="datasus.db", years=["*"]):
14    """Import population estimated per city by TCU (Tribunal de Contas da União).
15
16    Args:
17        db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db".
18        years (list, optional): list of years for which data will be imported (if available). Eg: `[2012, 2000, 2010]`. Defaults to ["*"].
19
20    ---
21
22    Extra:
23    - **Data description**: https://github.com/mymatsubara/datasus-db/blob/main/docs/ibge_pop_tcu.pdf
24    - **ftp path**: ftp.datasus.gov.br/dissemin/publicos/IBGE/POPTCU/POPTBR*.zip
25    """
26    logging.info(f"⏳ [{MAIN_TABLE}] Starting import...")
27
28    import_from_ftp(
29        [MAIN_TABLE],
30        [
31            f"/dissemin/publicos/IBGE/POPTCU/POPTBR{format_year(year)}.zip*"
32            for year in years
33        ],
34        fetch_ibge_pop_tcu,
35        db_file,
36    )
37
38
39def fetch_ibge_pop_tcu(ftp_path: str):
40    dbf_file = path.basename(ftp_path).split(".")[0] + ".dbf"
41    files = fetch_from_zip(ftp_path, [dbf_file])
42
43    df = read_as_df(dbf_file, files[dbf_file])
44
45    return {MAIN_TABLE: map_ibge_pop_tcu(df)}
46
47
48def map_ibge_pop_tcu(df: pl.DataFrame):
49    return to_schema(
50        df,
51        [
52            Column("MUNIC_RES", pl.UInt32),
53            Column("ANO", pl.UInt16),
54            Column("POPULACAO", pl.UInt32),
55        ],
56    ).with_columns(
57        pl.when(pl.col("MUNIC_RES") >= 1_000_000)
58        .then(pl.col("MUNIC_RES") // 10)
59        .otherwise(pl.col("MUNIC_RES"))
60        .name.keep()
61    )
MAIN_TABLE = 'IBGE_POP_TCU'
def import_ibge_pop_tcu(db_file='datasus.db', years=['*']):
14def import_ibge_pop_tcu(db_file="datasus.db", years=["*"]):
15    """Import population estimated per city by TCU (Tribunal de Contas da União).
16
17    Args:
18        db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db".
19        years (list, optional): list of years for which data will be imported (if available). Eg: `[2012, 2000, 2010]`. Defaults to ["*"].
20
21    ---
22
23    Extra:
24    - **Data description**: https://github.com/mymatsubara/datasus-db/blob/main/docs/ibge_pop_tcu.pdf
25    - **ftp path**: ftp.datasus.gov.br/dissemin/publicos/IBGE/POPTCU/POPTBR*.zip
26    """
27    logging.info(f"⏳ [{MAIN_TABLE}] Starting import...")
28
29    import_from_ftp(
30        [MAIN_TABLE],
31        [
32            f"/dissemin/publicos/IBGE/POPTCU/POPTBR{format_year(year)}.zip*"
33            for year in years
34        ],
35        fetch_ibge_pop_tcu,
36        db_file,
37    )

Import population estimated per city by TCU (Tribunal de Contas da União).

Arguments:
  • db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db".
  • years (list, optional): list of years for which data will be imported (if available). Eg: [2012, 2000, 2010]. Defaults to ["*"].

Extra:

def fetch_ibge_pop_tcu(ftp_path: str):
40def fetch_ibge_pop_tcu(ftp_path: str):
41    dbf_file = path.basename(ftp_path).split(".")[0] + ".dbf"
42    files = fetch_from_zip(ftp_path, [dbf_file])
43
44    df = read_as_df(dbf_file, files[dbf_file])
45
46    return {MAIN_TABLE: map_ibge_pop_tcu(df)}
def map_ibge_pop_tcu(df: polars.dataframe.frame.DataFrame):
49def map_ibge_pop_tcu(df: pl.DataFrame):
50    return to_schema(
51        df,
52        [
53            Column("MUNIC_RES", pl.UInt32),
54            Column("ANO", pl.UInt16),
55            Column("POPULACAO", pl.UInt32),
56        ],
57    ).with_columns(
58        pl.when(pl.col("MUNIC_RES") >= 1_000_000)
59        .then(pl.col("MUNIC_RES") // 10)
60        .otherwise(pl.col("MUNIC_RES"))
61        .name.keep()
62    )