Skip to content

Parsers

extract_info(context, pkg_dict, template_length, tokens)

Creates a simplified dictionary for use in a tweet string template.

Parameters:

Name Type Description Default
context

The current context.

required
pkg_dict

The package information to be simplified.

required
template_length

The length of the text in the template (without any of the tokens).

required
tokens

A list of token names in the template (extracted via regex).

required

Returns:

Type Description

dict

Source code in ckanext/twitter/lib/parsers.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
def extract_info(context, pkg_dict, template_length, tokens):
    """
    Creates a simplified dictionary for use in a tweet string template.

    :param context: The current context.
    :param pkg_dict: The package information to be simplified.
    :param template_length: The length of the text in the template (without any of the tokens).
    :param tokens: A list of token names in the template (extracted via regex).
    :return: dict
    """
    # get the values that are simple (i.e. not lists or dicts)
    simplified = {
        k: v
        for k, v in pkg_dict.items()
        if not isinstance(v, list) and not isinstance(v, dict) and v is not None
    }

    # turn the lists into counts
    list_lengths = {
        k: len(v)
        for k, v in pkg_dict.items()
        if isinstance(v, list) or isinstance(v, dict)
    }
    for k, v in list_lengths.items():
        simplified[k] = v

    # apply specific rules to certain fields
    simplified['records'] = get_number_records(context, pkg_dict['id'])
    simplified['author'] = truncate_author(simplified.get('author', 'Anon.'))

    # truncate other fields
    other_tokens = [
        t for t in tokens if t not in ['records', 'author'] and t in simplified.keys()
    ]
    max_total_token = tweet_limit - template_length
    total_token = sum(
        [len(str(simplified[t])) for t in tokens if t in ['records', 'author']]
    )
    for i in range(len(other_tokens)):
        char_limit = math.floor(
            (max_total_token - total_token) / (len(other_tokens) - i)
        )
        val = str(simplified.get(other_tokens[i], '')).strip()
        if len(val) > char_limit:
            val = truncate_field(val, char_limit)
        simplified[other_tokens[i]] = val
        total_token += len(val)
    return simplified

generate_tweet(context, pkg_id, is_new, force_truncate=True)

Generates a standard tweet based on template values in the config. Does not post the tweet; just generates and returns the text.

Parameters:

Name Type Description Default
context

The current context.

required
pkg_id

The ID of the package to tweet about.

required
is_new

True if the package has only just been created or given its first resource, False if it's being updated.

required
force_truncate

If True, enforces an extra check at the end to ensure the text is below 140 characters. This should not be necessary as other methods account for this, but this is an optional final check.

True

Returns:

Type Description

str

Source code in ckanext/twitter/lib/parsers.py
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
def generate_tweet(context, pkg_id, is_new, force_truncate=True):
    """
    Generates a standard tweet based on template values in the config. Does not post the
    tweet; just generates and returns the text.

    :param context: The current context.
    :param pkg_id: The ID of the package to tweet about.
    :param is_new: True if the package has only just been created or given
    its first resource, False if it's being updated.
    :param force_truncate: If True, enforces an extra check at the end to
    ensure the text is below 140 characters. This should not be necessary as
    other methods account for this, but this is an optional final check.
    :return: str
    """
    pkg = toolkit.get_action('package_show')(context, {'id': pkg_id})
    if pkg.get('private', False):
        return
    format_string = (
        config_helpers.twitter_new_format()
        if is_new
        else config_helpers.twitter_updated_format()
    )
    tokens = re.findall('(?:{{ )(\w+)(?:(?:|.+?)? }})', format_string)
    template = Environment().from_string(format_string)
    simplified_dict = extract_info(context, pkg, len(str(template.module)), tokens)
    rendered = template.render(simplified_dict)
    # extra check to make sure the tweet isn't too long
    if len(rendered) > tweet_limit and force_truncate:
        rendered = rendered[:tweet_limit]
    return rendered

get_number_records(context, pkg_id)

Counts the total number of records associated with a package.

Parameters:

Name Type Description Default
context

The current context.

required
pkg_id

The package ID.

required

Returns:

Type Description

int

Source code in ckanext/twitter/lib/parsers.py
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
def get_number_records(context, pkg_id):
    """
    Counts the total number of records associated with a package.

    :param context: The current context.
    :param pkg_id: The package ID.
    :return: int
    """
    pkg = toolkit.get_action('package_show')(context, {'id': pkg_id})
    resources = pkg.get('resources', None)
    if not resources or len(resources) == 0:
        return 0
    resource_ids = [r['id'] for r in resources]
    total = 0
    for rid in resource_ids:
        try:
            resource_data = toolkit.get_action('datastore_search')(
                context, {'resource_id': rid}
            )
            total += resource_data.get('total', 0)
        except (toolkit.ObjectNotFound, SearchIndexError):
            pass
    return total

truncate_author(author)

Shortens the author field using regular expressions.

Parameters:

Name Type Description Default
author

The full author string.

required

Returns:

Type Description

str

Source code in ckanext/twitter/lib/parsers.py
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
def truncate_author(author):
    """
    Shortens the author field using regular expressions.

    :param author: The full author string.
    :return: str
    """
    sep_rgx = '\s?[,;]\s?'
    name_sep_rgx = '(?<=[^,;])\s'
    separators = list(set(re.findall(sep_rgx, author)))
    name_sep = re.search(name_sep_rgx, author)
    if len(separators) == 0:
        return re.split(name_sep_rgx, author)[-1] if name_sep else author
    first_author = re.split(sep_rgx, author)[0]
    if len(separators) == 1:
        first_author = re.split(name_sep_rgx, first_author)[1]
    return f'{first_author} et al.'

truncate_field(value, char_limit)

Shortens the given value to a length equal to or less than the character limit and appends a continuation marker.

Parameters:

Name Type Description Default
value

The value to be truncated.

required
char_limit

The maximum number of characters in the output string.

required

Returns:

Type Description

str

Source code in ckanext/twitter/lib/parsers.py
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
def truncate_field(value, char_limit):
    """
    Shortens the given value to a length equal to or less than the character limit and
    appends a continuation marker.

    :param value: The value to be truncated.
    :param char_limit: The maximum number of characters in the output string.
    :return: str
    """
    marker = '[...]'
    truncated = []
    if ' ' in value:
        parts = value.split(' ')
        for p in parts:
            if sum([len(i) + 1 for i in truncated]) + len(p) + len(marker) < char_limit:
                truncated.append(p)
            else:
                break
        return ' '.join(truncated) + marker
    return value[: char_limit - len(marker)] + marker