Commit 8d1ddf24 authored by Paul Flowers's avatar Paul Flowers

initial commit

parent ff080a0c
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
This AWS Lambda function allowed to delete the old Elasticsearch index
"""
from botocore.auth import SigV4Auth
from botocore.awsrequest import AWSRequest
from botocore.credentials import create_credential_resolver
from botocore.session import get_session
from botocore.vendored.requests import Session
import sys
if sys.version_info[0] == 3:
from urllib.request import quote
else:
from urllib import quote
import datetime
import json
import time
import os
class ES_Exception(Exception):
"""Exception capturing status_code from Client Request"""
status_code = 0
payload = ""
def __init__(self, status_code, payload):
self.status_code = status_code
self.payload = payload
Exception.__init__(self,
"ES_Exception: status_code={}, payload={}".format(
status_code, payload))
class ES_Cleanup(object):
name = "lambda_es_cleanup"
def __init__(self, event, context):
"""Main Class init
Args:
event (dict): AWS Cloudwatch Scheduled Event
context (object): AWS running context
"""
self.report = []
self.event = event
self.context = context
self.cfg = {}
self.cfg["es_endpoint"] = os.environ.get("es_endpoint", None)
self.cfg["index"] = os.environ.get("index", "all").split(",")
self.cfg["delete_after"] = int(os.environ.get("delete_after", 15))
self.cfg["es_max_retry"] = int(os.environ.get("es_max_retry", 3))
self.cfg["index_format"] = os.environ.get("index_format", "%Y.%m.%d")
self.cfg["sns_alert"] = os.environ.get("sns_alert", "")
if not self.cfg["es_endpoint"]:
raise Exception("[es_endpoint] OS variable is not set")
def send_to_es(self, path, method="GET", payload={}):
"""Low-level POST data to Amazon Elasticsearch Service generating a Sigv4 signed request
Args:
path (str): path to send to ES
method (str, optional): HTTP method default:GET
payload (dict, optional): additional payload used during POST or PUT
Returns:
dict: json answer converted in dict
Raises:
#: Error during ES communication
ES_Exception: Description
"""
if not path.startswith("/"):
path = "/" + path
es_region = self.cfg["es_endpoint"].split(".")[1]
# send to ES with exponential backoff
retries = 0
while retries < int(self.cfg["es_max_retry"]):
if retries > 0:
seconds = (2**retries) * .1
# print('Waiting for %.1f seconds', seconds)
time.sleep(seconds)
req = AWSRequest(
method=method,
url="https://%s%s?pretty&format=json" % (self.cfg["es_endpoint"], quote(path)),
data=payload,
headers={'Host': self.cfg["es_endpoint"]})
credential_resolver = create_credential_resolver(get_session())
credentials = credential_resolver.load_credentials()
SigV4Auth(credentials, 'es', es_region).add_auth(req)
try:
preq = req.prepare()
session = Session()
res = session.send(preq)
if res.status_code >= 200 and res.status_code <= 299:
# print("%s %s" % (res.status_code, res.content))
return json.loads(res.content)
else:
raise ES_Exception(res.status_code, res._content)
except ES_Exception as e:
if (e.status_code >= 500) and (e.status_code <= 599):
retries += 1 # Candidate for retry
else:
raise # Stop retrying, re-raise exception
def send_error(self, msg):
"""Send SNS error
Args:
msg (str): error string
Returns:
None
"""
_msg = "[%s][%s] %s" % (self.name, self.cur_account, msg)
print(_msg)
if self.cfg["sns_alert"] != "":
sns_region = self.cfg["sns_alert"].split(":")[4]
sns = boto3.client("sns", region_name=sns_region)
response = sns.publish(
TopicArn=self.cfg["sns_alert"], Message=_msg)
def delete_index(self, index_name):
"""ES DELETE specific index
Args:
index_name (str): Index name
Returns:
dict: ES answer
"""
return self.send_to_es(index_name, "DELETE")
def get_indices(self):
"""ES Get indices
Returns:
dict: ES answer
"""
return self.send_to_es("/_cat/indices")
import boto3
from requests_aws4auth import AWS4Auth
from elasticsearch import Elasticsearch, RequestsHttpConnection
import curator
host = os.environ.get("es_endpoint", None)
region = os.environ.get("es_env", None)
service = 'es'
credentials = boto3.Session().get_credentials()
awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, service, session_token=credentials.token)
# Lambda execution starts here.
def lambda_handler(event, context):
"""Main Lambda function
Args:
event (dict): AWS Cloudwatch Scheduled Event
context (object): AWS running context
Returns:
None
"""
es = ES_Cleanup(event, context)
# Index cutoff definition, remove older than this date
earliest_to_keep = datetime.date.today() - datetime.timedelta(
days=int(es.cfg["delete_after"]))
for index in es.get_indices():
if index["index"] == ".kibana":
# ignore .kibana index
continue
# Build the Elasticsearch client.
es = Elasticsearch(
hosts = [{'host': host, 'port': 443}],
http_auth = awsauth,
use_ssl = True,
verify_certs = True,
connection_class = RequestsHttpConnection
)
index_list = curator.IndexList(es)
idx_name = '-'.join(word for word in index["index"].split("-")[:-1])
idx_date = index["index"].split("-")[-1]
# Filters by age, anything with a time stamp older than 30 days in the index name.
index_list.filter_by_age(source='name', direction='older', timestring='%Y.%m.%d', unit='days', unit_count=28)
if idx_name in es.cfg["index"] or "all" in es.cfg["index"]:
# Filters by naming prefix.
# index_list.filter_by_regex(kind='prefix', value='my-logs-2017')
if idx_date <= earliest_to_keep.strftime(es.cfg["index_format"]):
print("Deleting index: %s" % index["index"])
es.delete_index(index["index"])
# Filters by age, anything created more than one month ago.
# index_list.filter_by_age(source='creation_date', direction='older', unit='months', unit_count=1)
print("Found %s indices to delete" % len(index_list.indices))
if __name__ == '__main__':
event = {
'account': '123456789012',
'region': 'eu-west-1',
'detail': {},
'detail-type': 'Scheduled Event',
'source': 'aws.events',
'time': '1970-01-01T00:00:00Z',
'id': 'cdc73f9d-aea9-11e3-9d5a-835b769c0d9c',
'resources':
['arn:aws:events:us-east-1:123456789012:rule/my-schedule']
}
lambda_handler(event, "")
# If our filtered list contains any indices, delete them.
if index_list.indices:
curator.DeleteIndices(index_list).do_action()
\ No newline at end of file
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
This AWS Lambda function allowed to delete the old Elasticsearch index
"""
from botocore.auth import SigV4Auth
from botocore.awsrequest import AWSRequest
from botocore.credentials import create_credential_resolver
from botocore.session import get_session
from botocore.vendored.requests import Session
import sys
if sys.version_info[0] == 3:
from urllib.request import quote
else:
from urllib import quote
import datetime
import json
import time
import os
class ES_Exception(Exception):
"""Exception capturing status_code from Client Request"""
status_code = 0
payload = ""
def __init__(self, status_code, payload):
self.status_code = status_code
self.payload = payload
Exception.__init__(self,
"ES_Exception: status_code={}, payload={}".format(
status_code, payload))
class ES_Cleanup(object):
name = "lambda_es_cleanup"
def __init__(self, event, context):
"""Main Class init
Args:
event (dict): AWS Cloudwatch Scheduled Event
context (object): AWS running context
"""
self.report = []
self.event = event
self.context = context
self.cfg = {}
self.cfg["es_endpoint"] = os.environ.get("es_endpoint", None)
self.cfg["index"] = os.environ.get("index", "all").split(",")
self.cfg["delete_after"] = int(os.environ.get("delete_after", 15))
self.cfg["es_max_retry"] = int(os.environ.get("es_max_retry", 3))
self.cfg["index_format"] = os.environ.get("index_format", "%Y.%m.%d")
self.cfg["sns_alert"] = os.environ.get("sns_alert", "")
if not self.cfg["es_endpoint"]:
raise Exception("[es_endpoint] OS variable is not set")
def send_to_es(self, path, method="GET", payload={}):
"""Low-level POST data to Amazon Elasticsearch Service generating a Sigv4 signed request
Args:
path (str): path to send to ES
method (str, optional): HTTP method default:GET
payload (dict, optional): additional payload used during POST or PUT
Returns:
dict: json answer converted in dict
Raises:
#: Error during ES communication
ES_Exception: Description
"""
if not path.startswith("/"):
path = "/" + path
es_region = self.cfg["es_endpoint"].split(".")[1]
# send to ES with exponential backoff
retries = 0
while retries < int(self.cfg["es_max_retry"]):
if retries > 0:
seconds = (2**retries) * .1
# print('Waiting for %.1f seconds', seconds)
time.sleep(seconds)
req = AWSRequest(
method=method,
url="https://%s%s?pretty&format=json" % (self.cfg["es_endpoint"], quote(path)),
data=payload,
headers={'Host': self.cfg["es_endpoint"]})
credential_resolver = create_credential_resolver(get_session())
credentials = credential_resolver.load_credentials()
SigV4Auth(credentials, 'es', es_region).add_auth(req)
try:
preq = req.prepare()
session = Session()
res = session.send(preq)
if res.status_code >= 200 and res.status_code <= 299:
# print("%s %s" % (res.status_code, res.content))
return json.loads(res.content)
else:
raise ES_Exception(res.status_code, res._content)
except ES_Exception as e:
if (e.status_code >= 500) and (e.status_code <= 599):
retries += 1 # Candidate for retry
else:
raise # Stop retrying, re-raise exception
def send_error(self, msg):
"""Send SNS error
Args:
msg (str): error string
Returns:
None
"""
_msg = "[%s][%s] %s" % (self.name, self.cur_account, msg)
print(_msg)
if self.cfg["sns_alert"] != "":
sns_region = self.cfg["sns_alert"].split(":")[4]
sns = boto3.client("sns", region_name=sns_region)
response = sns.publish(
TopicArn=self.cfg["sns_alert"], Message=_msg)
def delete_index(self, index_name):
"""ES DELETE specific index
Args:
index_name (str): Index name
Returns:
dict: ES answer
"""
return self.send_to_es(index_name, "DELETE")
def get_indices(self):
"""ES Get indices
Returns:
dict: ES answer
"""
return self.send_to_es("/_cat/indices")
def lambda_handler(event, context):
"""Main Lambda function
Args:
event (dict): AWS Cloudwatch Scheduled Event
context (object): AWS running context
Returns:
None
"""
es = ES_Cleanup(event, context)
# Index cutoff definition, remove older than this date
earliest_to_keep = datetime.date.today() - datetime.timedelta(
days=int(es.cfg["delete_after"]))
for index in es.get_indices():
if index["index"] == ".kibana":
# ignore .kibana index
continue
idx_name = '-'.join(word for word in index["index"].split("-")[:-1])
idx_date = index["index"].split("-")[-1]
if idx_name in es.cfg["index"] or "all" in es.cfg["index"]:
if idx_date <= earliest_to_keep.strftime(es.cfg["index_format"]):
print("Deleting index: %s" % index["index"])
es.delete_index(index["index"])
if __name__ == '__main__':
event = {
'account': '123456789012',
'region': 'eu-west-1',
'detail': {},
'detail-type': 'Scheduled Event',
'source': 'aws.events',
'time': '1970-01-01T00:00:00Z',
'id': 'cdc73f9d-aea9-11e3-9d5a-835b769c0d9c',
'resources':
['arn:aws:events:us-east-1:123456789012:rule/my-schedule']
}
lambda_handler(event, "")
Release History
---------------
0.9 (2016-02-07)
++++++++++++++++
**New features**
- Support for STS temporary credentials. Thanks to https://github.com/magdalene
**Tests**
- Tests for the STS temporary credentials functionality
- Fixed AWS4SigningKey.amz_date deprecation warning test
- Elastic MapReduce live service test no longer using deprecated
DescribeJobFlows action
0.8 (2015-12-31)
++++++++++++++++
This version introduces some behaviour changes designed to reduce the legwork
needed when a signing key goes out of date. This has implications for
multithreading and secret key storage. See the README for further details.
**New features**
- AWS4Auth class now checks request header date against signing key scope
date, and automatically regenerates the signing key with the request
date if they don't match
- Added exceptions module with new exceptions: RequestsAWS4AuthException,
DateMismatchError, NoSecretKeyError, DateFormatError
- Added StrictAWS4Auth and PassiveAWS4Auth classes
**AWS4Auth changes**
- Added regenerate_signing_key() method, to allow regeneration of
current signing key with parameter overrides
- Added methods for checking and extracting dates from requests:
get_request_date(), parse_date(), handle_date_mismatch()
- __call__() now checks for a date header in the request and attempts
to automatically regenerate the signing key with the request date if
request date differs from the signing key date
- Can now supply a date to the constructor
- Changed default included sig headers to include 'Date' header if
present
**AWS4SigningKey changes**
- Added new store_secret_key instantiation parameter which allows
control of whether the secret key is stored in the instance
- Deprecated the amz_date property in favour of just 'date'
- Spelling typo fix in AWS4AuthSigningKey module docstring. Thanks
to jhgorrell
**Package changes**
- Dropped support for Python 3.2. Now only supported on Python 2.7 and 3.3 and
up, to match versions supported by Requests.
**Tests**
- Many new tests for the new functionality
- Added tests for generating canonical path, including test for fix
added in 0.7 for percent encoding of paths
- Added tests for generating canonical querystrings
0.7 (2015-11-02)
++++++++++++++++
**Bugfixes**
- Fixed percent encoded characters in URL paths not being encoded again
for signature generation, as is expected for all services except S3.
This was causing authentication failures whenever these characters
appeared in a URL. Thanks to ipartola and cristi23 for the report.
- Two bugfixes for ElasticSearch, thanks to Matthew Thompson for both:
* No longer setting body to b'' during signing if it's None
* Now stripping port from URL netloc for signature generation
**Modules**
- Upgraded the included version of six.py to 1.10
**Tests**
- Fixed a couple of broken Unicode tests on Python 2
- Added a couple more tests for encoding Unicode request bodies
0.6 (2015-09-07)
++++++++++++++++
**Bugfixes**
- Included HISTORY.rst in built package to fix pip source install failure.
Thanks to Beirdo for the bug report.
0.5 (2015-04-29)
++++++++++++++++
**Bugfixes**
- Fixed bug when uploading to S3 with x-amz-acl header which caused
authentication failure - headers used in signature are now: host,
content-type and all x-amz-* headers (except for x-amz-client-context which
breaks Mobile Analytics auth if included)
**Docs**
- Minor docstring and comment updates
**License**
- Changed content of LICENSE to vanilla MIT license
The MIT License (MIT)
Copyright (c) 2015 Sam Washington
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
requests-aws4auth includes the six library.
six License
===========
This is the MIT license: http://www.opensource.org/licenses/mit-license.php
Copyright (c) 2010-2015 Benjamin Peterson
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
requests-aws4auth
=================
.. image:: https://img.shields.io/pypi/v/requests-aws4auth.svg
:target: https://pypi.python.org/pypi/requests-aws4auth
.. image:: https://img.shields.io/pypi/l/requests-aws4auth.svg
:target: https://pypi.python.org/pypi/requests-aws4auth
Amazon Web Services version 4 authentication for the Python `Requests`_
library.
.. _Requests: https://github.com/kennethreitz/requests
Features
--------
* Requests authentication for all AWS services that support AWS auth v4
* Independent signing key objects
* Automatic regeneration of keys when scope date boundary is passed
* Support for STS temporary credentials
Implements header-based authentication, GET URL parameter and POST parameter
authentication are not supported.
Supported Services
------------------
This package has been tested as working against:
AppStream, Auto-Scaling, CloudFormation, CloudFront, CloudHSM, CloudSearch,
CloudTrail, CloudWatch Monitoring, CloudWatch Logs, CodeDeploy, Cognito
Identity, Cognito Sync, Config, DataPipeline, Direct Connect, DynamoDB, Elastic
Beanstalk, ElastiCache, EC2, EC2 Container Service, Elastic Load Balancing,
Elastic MapReduce, ElasticSearch, Elastic Transcoder, Glacier, Identity and
Access Management (IAM), Key Management Service (KMS), Kinesis, Lambda,
Opsworks, Redshift, Relational Database Service (RDS), Route 53, Simple Storage
Service (S3), Simple Notification Service (SNS), Simple Queue Service (SQS),
Storage Gateway, Security Token Service (STS)
The following services do not support AWS auth version 4 and are not usable
with this package:
Simple Email Service (SES), Simple Workflow Service (SWF), Import/Export,
SimpleDB, DevPay, Mechanical Turk
The AWS Support API has not been tested as it requires a premium subscription.
Installation
------------
Install via pip:
.. code-block:: bash
$ pip install requests-aws4auth
requests-aws4auth requires the `Requests`_ library by Kenneth Reitz.
requests-aws4auth is tested on Python 2.7 and 3.3 and up.
Behaviour changes in 0.8
------------------------
Version 0.8 introduces request date checking and automatic key regeneration
behaviour as default. This has implications for sharing authentication objects
between threads, and for storage of secret keys. See the relevant sections
below for details. See also the discussion in `GitHub issue #10`_.
.. _GitHub issue #10: https://github.com/sam-washington/requests-aws4auth/issues/10
Basic usage
-----------
.. code-block:: python
>>> import requests
>>> from requests_aws4auth import AWS4Auth
>>> endpoint = 'http://s3-eu-west-1.amazonaws.com'
>>> auth = AWS4Auth('<ACCESS ID>', '<ACCESS KEY>', 'eu-west-1', 's3')
>>> response = requests.get(endpoint, auth=auth)
>>> response.text
<?xml version="1.0" encoding="UTF-8"?>
<ListAllMyBucketsResult xmlns="http://s3.amazonaws.com/doc/2006-03-01">
<Owner>
<ID>bcaf1ffd86f461ca5fb16fd081034f</ID>
<DisplayName>webfile</DisplayName>
...
This example would list your buckets in the ``eu-west-1`` region of the Amazon
S3 service.
STS Temporary Credentials
-------------------------
.. code-block:: python
>>> from requests_aws4auth import AWS4Auth
>>> auth = AWS4Auth('<ACCESS ID>', '<ACCESS KEY>', 'eu-west-1', 's3',
session_token='<SESSION TOKEN>')
...
This example shows how to construct an AWS4Auth object for use with STS
temporary credentials. The ``x-amz-security-token`` header is added with
the session token. Temporary credential timeouts are not managed -- in
case the temporary credentials expire, they need to be re-generated and
the AWS4Auth object re-constructed with the new credentials.
Date handling
-------------
If an HTTP request to be authenticated contains a ``Date`` or ``X-Amz-Date``
header, AWS will only accept the authorised request if the date in the header
matches the scope date of the signing key (see the `AWS REST API date docs`_).
.. _AWS REST API date docs: http://docs.aws.amazon.com/general/latest/gr/sigv4-date-handling.html).
From version 0.8 of requests-aws4auth, if the header date does not match the
scope date, an ``AWS4Auth`` instance will automatically regenerate its signing
key, using the same scope parameters as the previous key except for the date,
which will be changed to match the request date. If a request does not include
a date, the current date is added to the request in an ``X-Amz-Date`` header,
and the signing key is regenerated if this differs from the scope date.
This means that ``AWS4Auth`` now extracts and parses dates from the values of
``X-Amz-Date`` and ``Date`` headers. Supported date formats are:
* RFC 7231 (e.g. Mon, 09 Sep 2011 23:36:00 GMT)
* RFC 850 (e.g. Sunday, 06-Nov-94 08:49:37 GMT)
* C time (e.g. Wed Dec 4 00:00:00 2002)
* Amz-Date format (e.g. 20090325T010101Z)
* ISO 8601 / RFC 3339 (e.g. 2009-03-25T10:11:12.13-01:00)
If either header is present but ``AWS4Auth`` cannot extract a date because all
present date headers are in an unrecognisable format, ``AWS4Auth`` will delete
any ``X-Amz-Date`` and ``Date`` headers present and replace with a single
``X-Amz-Date`` header containing the current date. This behaviour can be
modified using the ``raise_invalid_date`` keyword argument of the ``AWS4Auth``
constructor.
Automatic key regeneration
--------------------------
If you do not want the signing key to be automatically regenerated when a
mismatch between the request date and the scope date is encountered, use the
alternative ``StrictAWS4Auth`` class, which is identical to ``AWS4Auth`` except
that upon encountering a date mismatch it just raises a ``DateMismatchError``.
You can also use the ``PassiveAWS4Auth`` class, which mimics the ``AWS4Auth``
behaviour prior to version 0.8 and just signs and sends the request, whether
the date matches or not. In this case it is up to the calling code to handle an
authentication failure response from AWS caused by the date mismatch.
Secret key storage
------------------
To allow automatic key regeneration, the secret key is stored in the
``AWS4Auth`` instance, in the signing key object. If you do not want this to
occur, instantiate the instance using an ``AWS4Signing`` key which was created
with the store_secret_key parameter set to False:
.. code-block:: python
>>> sig_key = AWS4SigningKey(secret_key, region, service, date, False)
>>> auth = StrictAWS4Auth(access_id, sig_key)
The ``AWS4Auth`` class will then raise a ``NoSecretKeyError`` when it attempts
to regenerate its key. A slightly more conceptually elegant way to handle this
is to use the alternative ``StrictAWS4Auth`` class, again instantiating it with
an ``AWS4SigningKey`` instance created with ``store_secret_key = False``.
Multithreading
--------------
If you share ``AWS4Auth`` (or even ``StrictAWS4Auth``) instances between
threads you are likely to encounter problems. Because ``AWS4Auth`` instances
may unpredictably regenerate their signing key as part of signing a request,
threads using the same instance may find the key changed by another thread
halfway through the signing process, which may result in undefined behaviour.
It may be possible to rig up a workable instance sharing mechanism using
locking primitives and the ``StrictAWS4Auth`` class, however this poor author
can't think of a scenario which works safely yet doesn't suffer from at some
point blocking all threads for at least the duration of an HTTP request, which
could be several seconds. If several requests come in in close succession which
all require key regenerations then the system could be forced into serial
operation for quite a length of time.
In short, it's probably best to create a thread-local instance of ``AWS4Auth``
for each thread that needs to do authentication.
API reference
-------------
See the doctrings in ``aws4auth.py`` and ``aws4signingkey.py``.
Testing
-------
A test suite is included in the test folder.
The package passes all tests in the AWS auth v4 `test_suite`_, and contains
tests against the supported live services. See docstrings in
``test/requests_aws4auth_test.py`` for details about running the tests.
Connection parameters are included in the tests for the AWS Support API, should
you have access and want to try it. The documentation says it supports auth v4
so it should work if you have a subscription. Do pass on your results!
.. _test_suite: http://docs.aws.amazon.com/general/latest/gr/signature-v4-test-suite.html
Unsupported AWS features / todo
-------------------------------
* Currently does not support Amazon S3 chunked uploads
* Tests for new AWS services
* Requires Requests library to be present even if only using AWS4SigningKey
* Coherent documentation
#!/usr/bin/python
# -*- coding: utf-8 -*-
import re
import sys
from chardet.cli.chardetect import main
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0])
sys.exit(main())
Certifi: Python SSL Certificates
================================
`Certifi`_ is a carefully curated collection of Root Certificates for
validating the trustworthiness of SSL certificates while verifying the identity
of TLS hosts. It has been extracted from the `Requests`_ project.
Installation
------------
``certifi`` is available on PyPI. Simply install it with ``pip``::
$ pip install certifi
Usage
-----
To reference the installed certificate authority (CA) bundle, you can use the
built-in function::
>>> import certifi
>>> certifi.where()
'/usr/local/lib/python2.7/site-packages/certifi/cacert.pem'
Or from the command line::
$ python -m certifi
/usr/local/lib/python2.7/site-packages/certifi/cacert.pem
Enjoy!
1024-bit Root Certificates
~~~~~~~~~~~~~~~~~~~~~~~~~~
Browsers and certificate authorities have concluded that 1024-bit keys are
unacceptably weak for certificates, particularly root certificates. For this
reason, Mozilla has removed any weak (i.e. 1024-bit key) certificate from its
bundle, replacing it with an equivalent strong (i.e. 2048-bit or greater key)
certificate from the same CA. Because Mozilla removed these certificates from
its bundle, ``certifi`` removed them as well.
In previous versions, ``certifi`` provided the ``certifi.old_where()`` function
to intentionally re-add the 1024-bit roots back into your bundle. This was not
recommended in production and therefore was removed at the end of 2018.
.. _`Certifi`: https://certifi.io/en/latest/
.. _`Requests`: http://docs.python-requests.org/en/latest/
Metadata-Version: 2.0
Name: certifi
Version: 2019.9.11
Summary: Python package for providing Mozilla's CA Bundle.
Home-page: https://certifi.io/
Author: Kenneth Reitz
Author-email: me@kennethreitz.com
License: MPL-2.0
Platform: UNKNOWN
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)
Classifier: Natural Language :: English
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 2
Classifier: Programming Language :: Python :: 2.6
Classifier: Programming Language :: Python :: 2.7
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.3
Classifier: Programming Language :: Python :: 3.4
Classifier: Programming Language :: Python :: 3.5
Classifier: Programming Language :: Python :: 3.6
Classifier: Programming Language :: Python :: 3.7
Certifi: Python SSL Certificates
================================
`Certifi`_ is a carefully curated collection of Root Certificates for
validating the trustworthiness of SSL certificates while verifying the identity
of TLS hosts. It has been extracted from the `Requests`_ project.
Installation
------------
``certifi`` is available on PyPI. Simply install it with ``pip``::
$ pip install certifi
Usage
-----
To reference the installed certificate authority (CA) bundle, you can use the
built-in function::
>>> import certifi
>>> certifi.where()
'/usr/local/lib/python2.7/site-packages/certifi/cacert.pem'
Or from the command line::
$ python -m certifi
/usr/local/lib/python2.7/site-packages/certifi/cacert.pem
Enjoy!
1024-bit Root Certificates
~~~~~~~~~~~~~~~~~~~~~~~~~~
Browsers and certificate authorities have concluded that 1024-bit keys are
unacceptably weak for certificates, particularly root certificates. For this
reason, Mozilla has removed any weak (i.e. 1024-bit key) certificate from its
bundle, replacing it with an equivalent strong (i.e. 2048-bit or greater key)
certificate from the same CA. Because Mozilla removed these certificates from
its bundle, ``certifi`` removed them as well.
In previous versions, ``certifi`` provided the ``certifi.old_where()`` function
to intentionally re-add the 1024-bit roots back into your bundle. This was not
recommended in production and therefore was removed at the end of 2018.
.. _`Certifi`: https://certifi.io/en/latest/
.. _`Requests`: http://docs.python-requests.org/en/latest/
certifi-2019.9.11.dist-info/DESCRIPTION.rst,sha256=aLNHONztn2ZiBpSTivVFy6EDIWmuNYSsEQwx4NWbvB4,1580
certifi-2019.9.11.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
certifi-2019.9.11.dist-info/METADATA,sha256=M0Gen7rhgJKIvggZuENXqcZexg74gXdjGC679bMeoDw,2522
certifi-2019.9.11.dist-info/RECORD,,
certifi-2019.9.11.dist-info/WHEEL,sha256=5wvfB7GvgZAbKBSE9uX9Zbi6LCL-_KgezgHblXhCRnM,113
certifi-2019.9.11.dist-info/metadata.json,sha256=NppG2TtVr6va5nwyG9pxGhktdvudS-IfpTkaQaWKlBE,1022
certifi-2019.9.11.dist-info/top_level.txt,sha256=KMu4vUCfsjLrkPbSNdgdekS-pVJzBAJFO__nI8NF6-U,8
certifi/__init__.py,sha256=WFoavXHhpX-BZ5kbvyinZTbhLsqPJypLKIZu29nUsQg,52
certifi/__init__.pyc,,
certifi/__main__.py,sha256=FiOYt1Fltst7wk9DRa6GCoBr8qBUxlNQu_MKJf04E6s,41
certifi/__main__.pyc,,
certifi/cacert.pem,sha256=cVC1b0T-OcQzgdcRql2yMxT7O08O6pcJHnuO9nbLLn0,278533
certifi/core.py,sha256=EuFc2BsToG5O1-qsx4BSjQ1r1-7WRtH87b1WflZOWhI,218
certifi/core.pyc,,
Wheel-Version: 1.0
Generator: bdist_wheel (0.30.0.a0)
Root-Is-Purelib: true
Tag: py2-none-any
Tag: py3-none-any
{"classifiers": ["Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)", "Natural Language :: English", "Programming Language :: Python", "Programming Language :: Python :: 2", "Programming Language :: Python :: 2.6", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7"], "extensions": {"python.details": {"contacts": [{"email": "me@kennethreitz.com", "name": "Kenneth Reitz", "role": "author"}], "document_names": {"description": "DESCRIPTION.rst"}, "project_urls": {"Home": "https://certifi.io/"}}}, "generator": "bdist_wheel (0.30.0.a0)", "license": "MPL-2.0", "metadata_version": "2.0", "name": "certifi", "summary": "Python package for providing Mozilla's CA Bundle.", "version": "2019.9.11"}
\ No newline at end of file
from .core import where
__version__ = "2019.09.11"
from certifi import where
print(where())
This diff is collapsed.
# -*- coding: utf-8 -*-
"""
certifi.py
~~~~~~~~~~
This module returns the installation location of cacert.pem.
"""
import os
def where():
f = os.path.dirname(__file__)
return os.path.join(f, 'cacert.pem')
Chardet: The Universal Character Encoding Detector
--------------------------------------------------
.. image:: https://img.shields.io/travis/chardet/chardet/stable.svg
:alt: Build status
:target: https://travis-ci.org/chardet/chardet
.. image:: https://img.shields.io/coveralls/chardet/chardet/stable.svg
:target: https://coveralls.io/r/chardet/chardet
.. image:: https://img.shields.io/pypi/v/chardet.svg
:target: https://warehouse.python.org/project/chardet/
:alt: Latest version on PyPI
.. image:: https://img.shields.io/pypi/l/chardet.svg
:alt: License
Detects
- ASCII, UTF-8, UTF-16 (2 variants), UTF-32 (4 variants)
- Big5, GB2312, EUC-TW, HZ-GB-2312, ISO-2022-CN (Traditional and Simplified Chinese)
- EUC-JP, SHIFT_JIS, CP932, ISO-2022-JP (Japanese)
- EUC-KR, ISO-2022-KR (Korean)
- KOI8-R, MacCyrillic, IBM855, IBM866, ISO-8859-5, windows-1251 (Cyrillic)
- ISO-8859-5, windows-1251 (Bulgarian)
- ISO-8859-1, windows-1252 (Western European languages)
- ISO-8859-7, windows-1253 (Greek)
- ISO-8859-8, windows-1255 (Visual and Logical Hebrew)
- TIS-620 (Thai)
.. note::
Our ISO-8859-2 and windows-1250 (Hungarian) probers have been temporarily
disabled until we can retrain the models.
Requires Python 2.6, 2.7, or 3.3+.
Installation
------------
Install from `PyPI <https://pypi.python.org/pypi/chardet>`_::
pip install chardet
Documentation
-------------
For users, docs are now available at https://chardet.readthedocs.io/.
Command-line Tool
-----------------
chardet comes with a command-line script which reports on the encodings of one
or more files::
% chardetect somefile someotherfile
somefile: windows-1252 with confidence 0.5
someotherfile: ascii with confidence 1.0
About
-----
This is a continuation of Mark Pilgrim's excellent chardet. Previously, two
versions needed to be maintained: one that supported python 2.x and one that
supported python 3.x. We've recently merged with `Ian Cordasco <https://github.com/sigmavirus24>`_'s
`charade <https://github.com/sigmavirus24/charade>`_ fork, so now we have one
coherent version that works for Python 2.6+.
:maintainer: Dan Blanchard
Metadata-Version: 2.0
Name: chardet
Version: 3.0.4
Summary: Universal encoding detector for Python 2 and 3
Home-page: https://github.com/chardet/chardet
Author: Daniel Blanchard
Author-email: dan.blanchard@gmail.com
License: LGPL
Keywords: encoding,i18n,xml
Platform: UNKNOWN
Classifier: Development Status :: 4 - Beta
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL)
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 2
Classifier: Programming Language :: Python :: 2.6
Classifier: Programming Language :: Python :: 2.7
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.3
Classifier: Programming Language :: Python :: 3.4
Classifier: Programming Language :: Python :: 3.5
Classifier: Programming Language :: Python :: 3.6
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Classifier: Topic :: Text Processing :: Linguistic
Chardet: The Universal Character Encoding Detector
--------------------------------------------------
.. image:: https://img.shields.io/travis/chardet/chardet/stable.svg
:alt: Build status
:target: https://travis-ci.org/chardet/chardet
.. image:: https://img.shields.io/coveralls/chardet/chardet/stable.svg
:target: https://coveralls.io/r/chardet/chardet
.. image:: https://img.shields.io/pypi/v/chardet.svg
:target: https://warehouse.python.org/project/chardet/
:alt: Latest version on PyPI
.. image:: https://img.shields.io/pypi/l/chardet.svg
:alt: License
Detects
- ASCII, UTF-8, UTF-16 (2 variants), UTF-32 (4 variants)
- Big5, GB2312, EUC-TW, HZ-GB-2312, ISO-2022-CN (Traditional and Simplified Chinese)
- EUC-JP, SHIFT_JIS, CP932, ISO-2022-JP (Japanese)
- EUC-KR, ISO-2022-KR (Korean)
- KOI8-R, MacCyrillic, IBM855, IBM866, ISO-8859-5, windows-1251 (Cyrillic)
- ISO-8859-5, windows-1251 (Bulgarian)
- ISO-8859-1, windows-1252 (Western European languages)
- ISO-8859-7, windows-1253 (Greek)
- ISO-8859-8, windows-1255 (Visual and Logical Hebrew)
- TIS-620 (Thai)
.. note::
Our ISO-8859-2 and windows-1250 (Hungarian) probers have been temporarily
disabled until we can retrain the models.
Requires Python 2.6, 2.7, or 3.3+.
Installation
------------
Install from `PyPI <https://pypi.python.org/pypi/chardet>`_::
pip install chardet
Documentation
-------------
For users, docs are now available at https://chardet.readthedocs.io/.
Command-line Tool
-----------------
chardet comes with a command-line script which reports on the encodings of one
or more files::
% chardetect somefile someotherfile
somefile: windows-1252 with confidence 0.5
someotherfile: ascii with confidence 1.0
About
-----
This is a continuation of Mark Pilgrim's excellent chardet. Previously, two
versions needed to be maintained: one that supported python 2.x and one that
supported python 3.x. We've recently merged with `Ian Cordasco <https://github.com/sigmavirus24>`_'s
`charade <https://github.com/sigmavirus24/charade>`_ fork, so now we have one
coherent version that works for Python 2.6+.
:maintainer: Dan Blanchard
../../bin/chardetect,sha256=utGHWq2L-B8QhbE4EqSt8m5LXPfYjDHE4CYkTIP4t0c,223
chardet-3.0.4.dist-info/DESCRIPTION.rst,sha256=PQ4sBsMyKFZkjC6QpmbpLn0UtCNyeb-ZqvCGEgyZMGk,2174
chardet-3.0.4.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
chardet-3.0.4.dist-info/METADATA,sha256=RV_2I4B1Z586DL8oVO5Kp7X5bUdQ5EuKAvNoAEF8wSw,3239
chardet-3.0.4.dist-info/RECORD,,
chardet-3.0.4.dist-info/WHEEL,sha256=o2k-Qa-RMNIJmUdIc7KU6VWR_ErNRbWNlxDIpl7lm34,110
chardet-3.0.4.dist-info/entry_points.txt,sha256=fAMmhu5eJ-zAJ-smfqQwRClQ3-nozOCmvJ6-E8lgGJo,60
chardet-3.0.4.dist-info/metadata.json,sha256=0htbRM18ujyGZDdfowgAqj6Hq2eQtwzwyhaEveKntgo,1375
chardet-3.0.4.dist-info/top_level.txt,sha256=AowzBbZy4x8EirABDdJSLJZMkJ_53iIag8xfKR6D7kI,8
chardet/__init__.py,sha256=YsP5wQlsHJ2auF1RZJfypiSrCA7_bQiRm3ES_NI76-Y,1559
chardet/__init__.pyc,,
chardet/big5freq.py,sha256=D_zK5GyzoVsRes0HkLJziltFQX0bKCLOrFe9_xDvO_8,31254
chardet/big5freq.pyc,,
chardet/big5prober.py,sha256=kBxHbdetBpPe7xrlb-e990iot64g_eGSLd32lB7_h3M,1757
chardet/big5prober.pyc,,
chardet/chardistribution.py,sha256=3woWS62KrGooKyqz4zQSnjFbJpa6V7g02daAibTwcl8,9411
chardet/chardistribution.pyc,,
chardet/charsetgroupprober.py,sha256=6bDu8YIiRuScX4ca9Igb0U69TA2PGXXDej6Cc4_9kO4,3787
chardet/charsetgroupprober.pyc,,
chardet/charsetprober.py,sha256=KSmwJErjypyj0bRZmC5F5eM7c8YQgLYIjZXintZNstg,5110
chardet/charsetprober.pyc,,
chardet/cli/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
chardet/cli/__init__.pyc,,
chardet/cli/chardetect.py,sha256=YBO8L4mXo0WR6_-Fjh_8QxPBoEBNqB9oNxNrdc54AQs,2738
chardet/cli/chardetect.pyc,,
chardet/codingstatemachine.py,sha256=VYp_6cyyki5sHgXDSZnXW4q1oelHc3cu9AyQTX7uug8,3590
chardet/codingstatemachine.pyc,,
chardet/compat.py,sha256=PKTzHkSbtbHDqS9PyujMbX74q1a8mMpeQTDVsQhZMRw,1134
chardet/compat.pyc,,
chardet/cp949prober.py,sha256=TZ434QX8zzBsnUvL_8wm4AQVTZ2ZkqEEQL_lNw9f9ow,1855
chardet/cp949prober.pyc,,
chardet/enums.py,sha256=Aimwdb9as1dJKZaFNUH2OhWIVBVd6ZkJJ_WK5sNY8cU,1661
chardet/enums.pyc,,
chardet/escprober.py,sha256=kkyqVg1Yw3DIOAMJ2bdlyQgUFQhuHAW8dUGskToNWSc,3950
chardet/escprober.pyc,,
chardet/escsm.py,sha256=RuXlgNvTIDarndvllNCk5WZBIpdCxQ0kcd9EAuxUh84,10510
chardet/escsm.pyc,,
chardet/eucjpprober.py,sha256=iD8Jdp0ISRjgjiVN7f0e8xGeQJ5GM2oeZ1dA8nbSeUw,3749
chardet/eucjpprober.pyc,,
chardet/euckrfreq.py,sha256=-7GdmvgWez4-eO4SuXpa7tBiDi5vRXQ8WvdFAzVaSfo,13546
chardet/euckrfreq.pyc,,
chardet/euckrprober.py,sha256=MqFMTQXxW4HbzIpZ9lKDHB3GN8SP4yiHenTmf8g_PxY,1748
chardet/euckrprober.pyc,,
chardet/euctwfreq.py,sha256=No1WyduFOgB5VITUA7PLyC5oJRNzRyMbBxaKI1l16MA,31621
chardet/euctwfreq.pyc,,
chardet/euctwprober.py,sha256=13p6EP4yRaxqnP4iHtxHOJ6R2zxHq1_m8hTRjzVZ95c,1747
chardet/euctwprober.pyc,,
chardet/gb2312freq.py,sha256=JX8lsweKLmnCwmk8UHEQsLgkr_rP_kEbvivC4qPOrlc,20715
chardet/gb2312freq.pyc,,
chardet/gb2312prober.py,sha256=gGvIWi9WhDjE-xQXHvNIyrnLvEbMAYgyUSZ65HUfylw,1754
chardet/gb2312prober.pyc,,
chardet/hebrewprober.py,sha256=c3SZ-K7hvyzGY6JRAZxJgwJ_sUS9k0WYkvMY00YBYFo,13838
chardet/hebrewprober.pyc,,
chardet/jisfreq.py,sha256=vpmJv2Bu0J8gnMVRPHMFefTRvo_ha1mryLig8CBwgOg,25777
chardet/jisfreq.pyc,,
chardet/jpcntx.py,sha256=PYlNqRUQT8LM3cT5FmHGP0iiscFlTWED92MALvBungo,19643
chardet/jpcntx.pyc,,
chardet/langbulgarianmodel.py,sha256=1HqQS9Pbtnj1xQgxitJMvw8X6kKr5OockNCZWfEQrPE,12839
chardet/langbulgarianmodel.pyc,,
chardet/langcyrillicmodel.py,sha256=LODajvsetH87yYDDQKA2CULXUH87tI223dhfjh9Zx9c,17948
chardet/langcyrillicmodel.pyc,,
chardet/langgreekmodel.py,sha256=8YAW7bU8YwSJap0kIJSbPMw1BEqzGjWzqcqf0WgUKAA,12688
chardet/langgreekmodel.pyc,,
chardet/langhebrewmodel.py,sha256=JSnqmE5E62tDLTPTvLpQsg5gOMO4PbdWRvV7Avkc0HA,11345
chardet/langhebrewmodel.pyc,,
chardet/langhungarianmodel.py,sha256=RhapYSG5l0ZaO-VV4Fan5sW0WRGQqhwBM61yx3yxyOA,12592
chardet/langhungarianmodel.pyc,,
chardet/langthaimodel.py,sha256=8l0173Gu_W6G8mxmQOTEF4ls2YdE7FxWf3QkSxEGXJQ,11290
chardet/langthaimodel.pyc,,
chardet/langturkishmodel.py,sha256=W22eRNJsqI6uWAfwXSKVWWnCerYqrI8dZQTm_M0lRFk,11102
chardet/langturkishmodel.pyc,,
chardet/latin1prober.py,sha256=S2IoORhFk39FEFOlSFWtgVybRiP6h7BlLldHVclNkU8,5370
chardet/latin1prober.pyc,,
chardet/mbcharsetprober.py,sha256=AR95eFH9vuqSfvLQZN-L5ijea25NOBCoXqw8s5O9xLQ,3413
chardet/mbcharsetprober.pyc,,
chardet/mbcsgroupprober.py,sha256=h6TRnnYq2OxG1WdD5JOyxcdVpn7dG0q-vB8nWr5mbh4,2012
chardet/mbcsgroupprober.pyc,,
chardet/mbcssm.py,sha256=SY32wVIF3HzcjY3BaEspy9metbNSKxIIB0RKPn7tjpI,25481
chardet/mbcssm.pyc,,
chardet/sbcharsetprober.py,sha256=LDSpCldDCFlYwUkGkwD2oFxLlPWIWXT09akH_2PiY74,5657
chardet/sbcharsetprober.pyc,,
chardet/sbcsgroupprober.py,sha256=1IprcCB_k1qfmnxGC6MBbxELlKqD3scW6S8YIwdeyXA,3546
chardet/sbcsgroupprober.pyc,,
chardet/sjisprober.py,sha256=IIt-lZj0WJqK4rmUZzKZP4GJlE8KUEtFYVuY96ek5MQ,3774
chardet/sjisprober.pyc,,
chardet/universaldetector.py,sha256=qL0174lSZE442eB21nnktT9_VcAye07laFWUeUrjttY,12485
chardet/universaldetector.pyc,,
chardet/utf8prober.py,sha256=IdD8v3zWOsB8OLiyPi-y_fqwipRFxV9Nc1eKBLSuIEw,2766
chardet/utf8prober.pyc,,
chardet/version.py,sha256=sp3B08mrDXB-pf3K9fqJ_zeDHOCLC8RrngQyDFap_7g,242
chardet/version.pyc,,
Wheel-Version: 1.0
Generator: bdist_wheel (0.29.0)
Root-Is-Purelib: true
Tag: py2-none-any
Tag: py3-none-any
[console_scripts]
chardetect = chardet.cli.chardetect:main
{"classifiers": ["Development Status :: 4 - Beta", "Intended Audience :: Developers", "License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL)", "Operating System :: OS Independent", "Programming Language :: Python", "Programming Language :: Python :: 2", "Programming Language :: Python :: 2.6", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Text Processing :: Linguistic"], "extensions": {"python.commands": {"wrap_console": {"chardetect": "chardet.cli.chardetect:main"}}, "python.details": {"contacts": [{"email": "dan.blanchard@gmail.com", "name": "Daniel Blanchard", "role": "author"}], "document_names": {"description": "DESCRIPTION.rst"}, "project_urls": {"Home": "https://github.com/chardet/chardet"}}, "python.exports": {"console_scripts": {"chardetect": "chardet.cli.chardetect:main"}}}, "generator": "bdist_wheel (0.29.0)", "keywords": ["encoding", "i18n", "xml"], "license": "LGPL", "metadata_version": "2.0", "name": "chardet", "summary": "Universal encoding detector for Python 2 and 3", "test_requires": [{"requires": ["hypothesis", "pytest"]}], "version": "3.0.4"}
\ No newline at end of file
######################## BEGIN LICENSE BLOCK ########################
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .compat import PY2, PY3
from .universaldetector import UniversalDetector
from .version import __version__, VERSION
def detect(byte_str):
"""
Detect the encoding of the given byte string.
:param byte_str: The byte sequence to examine.
:type byte_str: ``bytes`` or ``bytearray``
"""
if not isinstance(byte_str, bytearray):
if not isinstance(byte_str, bytes):
raise TypeError('Expected object of type bytes or bytearray, got: '
'{0}'.format(type(byte_str)))
else:
byte_str = bytearray(byte_str)
detector = UniversalDetector()
detector.feed(byte_str)
return detector.close()
This diff is collapsed.
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Communicator client code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .mbcharsetprober import MultiByteCharSetProber
from .codingstatemachine import CodingStateMachine
from .chardistribution import Big5DistributionAnalysis
from .mbcssm import BIG5_SM_MODEL
class Big5Prober(MultiByteCharSetProber):
def __init__(self):
super(Big5Prober, self).__init__()
self.coding_sm = CodingStateMachine(BIG5_SM_MODEL)
self.distribution_analyzer = Big5DistributionAnalysis()
self.reset()
@property
def charset_name(self):
return "Big5"
@property
def language(self):
return "Chinese"
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Communicator client code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .euctwfreq import (EUCTW_CHAR_TO_FREQ_ORDER, EUCTW_TABLE_SIZE,
EUCTW_TYPICAL_DISTRIBUTION_RATIO)
from .euckrfreq import (EUCKR_CHAR_TO_FREQ_ORDER, EUCKR_TABLE_SIZE,
EUCKR_TYPICAL_DISTRIBUTION_RATIO)
from .gb2312freq import (GB2312_CHAR_TO_FREQ_ORDER, GB2312_TABLE_SIZE,
GB2312_TYPICAL_DISTRIBUTION_RATIO)
from .big5freq import (BIG5_CHAR_TO_FREQ_ORDER, BIG5_TABLE_SIZE,
BIG5_TYPICAL_DISTRIBUTION_RATIO)
from .jisfreq import (JIS_CHAR_TO_FREQ_ORDER, JIS_TABLE_SIZE,
JIS_TYPICAL_DISTRIBUTION_RATIO)
class CharDistributionAnalysis(object):
ENOUGH_DATA_THRESHOLD = 1024
SURE_YES = 0.99
SURE_NO = 0.01
MINIMUM_DATA_THRESHOLD = 3
def __init__(self):
# Mapping table to get frequency order from char order (get from
# GetOrder())
self._char_to_freq_order = None
self._table_size = None # Size of above table
# This is a constant value which varies from language to language,
# used in calculating confidence. See
# http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html
# for further detail.
self.typical_distribution_ratio = None
self._done = None
self._total_chars = None
self._freq_chars = None
self.reset()
def reset(self):
"""reset analyser, clear any state"""
# If this flag is set to True, detection is done and conclusion has
# been made
self._done = False
self._total_chars = 0 # Total characters encountered
# The number of characters whose frequency order is less than 512
self._freq_chars = 0
def feed(self, char, char_len):
"""feed a character with known length"""
if char_len == 2:
# we only care about 2-bytes character in our distribution analysis
order = self.get_order(char)
else:
order = -1
if order >= 0:
self._total_chars += 1
# order is valid
if order < self._table_size:
if 512 > self._char_to_freq_order[order]:
self._freq_chars += 1
def get_confidence(self):
"""return confidence based on existing data"""
# if we didn't receive any character in our consideration range,
# return negative answer
if self._total_chars <= 0 or self._freq_chars <= self.MINIMUM_DATA_THRESHOLD:
return self.SURE_NO
if self._total_chars != self._freq_chars:
r = (self._freq_chars / ((self._total_chars - self._freq_chars)
* self.typical_distribution_ratio))
if r < self.SURE_YES:
return r
# normalize confidence (we don't want to be 100% sure)
return self.SURE_YES
def got_enough_data(self):
# It is not necessary to receive all data to draw conclusion.
# For charset detection, certain amount of data is enough
return self._total_chars > self.ENOUGH_DATA_THRESHOLD
def get_order(self, byte_str):
# We do not handle characters based on the original encoding string,
# but convert this encoding string to a number, here called order.
# This allows multiple encodings of a language to share one frequency
# table.
return -1
class EUCTWDistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
super(EUCTWDistributionAnalysis, self).__init__()
self._char_to_freq_order = EUCTW_CHAR_TO_FREQ_ORDER
self._table_size = EUCTW_TABLE_SIZE
self.typical_distribution_ratio = EUCTW_TYPICAL_DISTRIBUTION_RATIO
def get_order(self, byte_str):
# for euc-TW encoding, we are interested
# first byte range: 0xc4 -- 0xfe
# second byte range: 0xa1 -- 0xfe
# no validation needed here. State machine has done that
first_char = byte_str[0]
if first_char >= 0xC4:
return 94 * (first_char - 0xC4) + byte_str[1] - 0xA1
else:
return -1
class EUCKRDistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
super(EUCKRDistributionAnalysis, self).__init__()
self._char_to_freq_order = EUCKR_CHAR_TO_FREQ_ORDER
self._table_size = EUCKR_TABLE_SIZE
self.typical_distribution_ratio = EUCKR_TYPICAL_DISTRIBUTION_RATIO
def get_order(self, byte_str):
# for euc-KR encoding, we are interested
# first byte range: 0xb0 -- 0xfe
# second byte range: 0xa1 -- 0xfe
# no validation needed here. State machine has done that
first_char = byte_str[0]
if first_char >= 0xB0:
return 94 * (first_char - 0xB0) + byte_str[1] - 0xA1
else:
return -1
class GB2312DistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
super(GB2312DistributionAnalysis, self).__init__()
self._char_to_freq_order = GB2312_CHAR_TO_FREQ_ORDER
self._table_size = GB2312_TABLE_SIZE
self.typical_distribution_ratio = GB2312_TYPICAL_DISTRIBUTION_RATIO
def get_order(self, byte_str):
# for GB2312 encoding, we are interested
# first byte range: 0xb0 -- 0xfe
# second byte range: 0xa1 -- 0xfe
# no validation needed here. State machine has done that
first_char, second_char = byte_str[0], byte_str[1]
if (first_char >= 0xB0) and (second_char >= 0xA1):
return 94 * (first_char - 0xB0) + second_char - 0xA1
else:
return -1
class Big5DistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
super(Big5DistributionAnalysis, self).__init__()
self._char_to_freq_order = BIG5_CHAR_TO_FREQ_ORDER
self._table_size = BIG5_TABLE_SIZE
self.typical_distribution_ratio = BIG5_TYPICAL_DISTRIBUTION_RATIO
def get_order(self, byte_str):
# for big5 encoding, we are interested
# first byte range: 0xa4 -- 0xfe
# second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
# no validation needed here. State machine has done that
first_char, second_char = byte_str[0], byte_str[1]
if first_char >= 0xA4:
if second_char >= 0xA1:
return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63
else:
return 157 * (first_char - 0xA4) + second_char - 0x40
else:
return -1
class SJISDistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
super(SJISDistributionAnalysis, self).__init__()
self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER
self._table_size = JIS_TABLE_SIZE
self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO
def get_order(self, byte_str):
# for sjis encoding, we are interested
# first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe
# second byte range: 0x40 -- 0x7e, 0x81 -- oxfe
# no validation needed here. State machine has done that
first_char, second_char = byte_str[0], byte_str[1]
if (first_char >= 0x81) and (first_char <= 0x9F):
order = 188 * (first_char - 0x81)
elif (first_char >= 0xE0) and (first_char <= 0xEF):
order = 188 * (first_char - 0xE0 + 31)
else:
return -1
order = order + second_char - 0x40
if second_char > 0x7F:
order = -1
return order
class EUCJPDistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
super(EUCJPDistributionAnalysis, self).__init__()
self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER
self._table_size = JIS_TABLE_SIZE
self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO
def get_order(self, byte_str):
# for euc-JP encoding, we are interested
# first byte range: 0xa0 -- 0xfe
# second byte range: 0xa1 -- 0xfe
# no validation needed here. State machine has done that
char = byte_str[0]
if char >= 0xA0:
return 94 * (char - 0xA1) + byte_str[1] - 0xa1
else:
return -1
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Communicator client code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .enums import ProbingState
from .charsetprober import CharSetProber
class CharSetGroupProber(CharSetProber):
def __init__(self, lang_filter=None):
super(CharSetGroupProber, self).__init__(lang_filter=lang_filter)
self._active_num = 0
self.probers = []
self._best_guess_prober = None
def reset(self):
super(CharSetGroupProber, self).reset()
self._active_num = 0
for prober in self.probers:
if prober:
prober.reset()
prober.active = True
self._active_num += 1
self._best_guess_prober = None
@property
def charset_name(self):
if not self._best_guess_prober:
self.get_confidence()
if not self._best_guess_prober:
return None
return self._best_guess_prober.charset_name
@property
def language(self):
if not self._best_guess_prober:
self.get_confidence()
if not self._best_guess_prober:
return None
return self._best_guess_prober.language
def feed(self, byte_str):
for prober in self.probers:
if not prober:
continue
if not prober.active:
continue
state = prober.feed(byte_str)
if not state:
continue
if state == ProbingState.FOUND_IT:
self._best_guess_prober = prober
return self.state
elif state == ProbingState.NOT_ME:
prober.active = False
self._active_num -= 1
if self._active_num <= 0:
self._state = ProbingState.NOT_ME
return self.state
return self.state
def get_confidence(self):
state = self.state
if state == ProbingState.FOUND_IT:
return 0.99
elif state == ProbingState.NOT_ME:
return 0.01
best_conf = 0.0
self._best_guess_prober = None
for prober in self.probers:
if not prober:
continue
if not prober.active:
self.logger.debug('%s not active', prober.charset_name)
continue
conf = prober.get_confidence()
self.logger.debug('%s %s confidence = %s', prober.charset_name, prober.language, conf)
if best_conf < conf:
best_conf = conf
self._best_guess_prober = prober
if not self._best_guess_prober:
return 0.0
return best_conf
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Universal charset detector code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 2001
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
# Shy Shalom - original C code
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
import logging
import re
from .enums import ProbingState
class CharSetProber(object):
SHORTCUT_THRESHOLD = 0.95
def __init__(self, lang_filter=None):
self._state = None
self.lang_filter = lang_filter
self.logger = logging.getLogger(__name__)
def reset(self):
self._state = ProbingState.DETECTING
@property
def charset_name(self):
return None
def feed(self, buf):
pass
@property
def state(self):
return self._state
def get_confidence(self):
return 0.0
@staticmethod
def filter_high_byte_only(buf):
buf = re.sub(b'([\x00-\x7F])+', b' ', buf)
return buf
@staticmethod
def filter_international_words(buf):
"""
We define three types of bytes:
alphabet: english alphabets [a-zA-Z]
international: international characters [\x80-\xFF]
marker: everything else [^a-zA-Z\x80-\xFF]
The input buffer can be thought to contain a series of words delimited
by markers. This function works to filter all words that contain at
least one international character. All contiguous sequences of markers
are replaced by a single space ascii character.
This filter applies to all scripts which do not use English characters.
"""
filtered = bytearray()
# This regex expression filters out only words that have at-least one
# international character. The word may include one marker character at
# the end.
words = re.findall(b'[a-zA-Z]*[\x80-\xFF]+[a-zA-Z]*[^a-zA-Z\x80-\xFF]?',
buf)
for word in words:
filtered.extend(word[:-1])
# If the last character in the word is a marker, replace it with a
# space as markers shouldn't affect our analysis (they are used
# similarly across all languages and may thus have similar
# frequencies).
last_char = word[-1:]
if not last_char.isalpha() and last_char < b'\x80':
last_char = b' '
filtered.extend(last_char)
return filtered
@staticmethod
def filter_with_english_letters(buf):
"""
Returns a copy of ``buf`` that retains only the sequences of English
alphabet and high byte characters that are not between <> characters.
Also retains English alphabet and high byte characters immediately
before occurrences of >.
This filter can be applied to all scripts which contain both English
characters and extended ASCII characters, but is currently only used by
``Latin1Prober``.
"""
filtered = bytearray()
in_tag = False
prev = 0
for curr in range(len(buf)):
# Slice here to get bytes instead of an int with Python 3
buf_char = buf[curr:curr + 1]
# Check if we're coming out of or entering an HTML tag
if buf_char == b'>':
in_tag = False
elif buf_char == b'<':
in_tag = True
# If current character is not extended-ASCII and not alphabetic...
if buf_char < b'\x80' and not buf_char.isalpha():
# ...and we're not in a tag
if curr > prev and not in_tag:
# Keep everything after last non-extended-ASCII,
# non-alphabetic character
filtered.extend(buf[prev:curr])
# Output a space to delimit stretch we kept
filtered.extend(b' ')
prev = curr + 1
# If we're not in a tag...
if not in_tag:
# Keep everything after last non-extended-ASCII, non-alphabetic
# character
filtered.extend(buf[prev:])
return filtered
#!/usr/bin/env python
"""
Script which takes one or more file paths and reports on their detected
encodings
Example::
% chardetect somefile someotherfile
somefile: windows-1252 with confidence 0.5
someotherfile: ascii with confidence 1.0
If no paths are provided, it takes its input from stdin.
"""
from __future__ import absolute_import, print_function, unicode_literals
import argparse
import sys
from chardet import __version__
from chardet.compat import PY2
from chardet.universaldetector import UniversalDetector
def description_of(lines, name='stdin'):
"""
Return a string describing the probable encoding of a file or
list of strings.
:param lines: The lines to get the encoding of.
:type lines: Iterable of bytes
:param name: Name of file or collection of lines
:type name: str
"""
u = UniversalDetector()
for line in lines:
line = bytearray(line)
u.feed(line)
# shortcut out of the loop to save reading further - particularly useful if we read a BOM.
if u.done:
break
u.close()
result = u.result
if PY2:
name = name.decode(sys.getfilesystemencoding(), 'ignore')
if result['encoding']:
return '{0}: {1} with confidence {2}'.format(name, result['encoding'],
result['confidence'])
else:
return '{0}: no result'.format(name)
def main(argv=None):
"""
Handles command line arguments and gets things started.
:param argv: List of arguments, as if specified on the command-line.
If None, ``sys.argv[1:]`` is used instead.
:type argv: list of str
"""
# Get command line arguments
parser = argparse.ArgumentParser(
description="Takes one or more file paths and reports their detected \
encodings")
parser.add_argument('input',
help='File whose encoding we would like to determine. \
(default: stdin)',
type=argparse.FileType('rb'), nargs='*',
default=[sys.stdin if PY2 else sys.stdin.buffer])
parser.add_argument('--version', action='version',
version='%(prog)s {0}'.format(__version__))
args = parser.parse_args(argv)
for f in args.input:
if f.isatty():
print("You are running chardetect interactively. Press " +
"CTRL-D twice at the start of a blank line to signal the " +
"end of your input. If you want help, run chardetect " +
"--help\n", file=sys.stderr)
print(description_of(f, f.name))
if __name__ == '__main__':
main()
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is mozilla.org code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
import logging
from .enums import MachineState
class CodingStateMachine(object):
"""
A state machine to verify a byte sequence for a particular encoding. For
each byte the detector receives, it will feed that byte to every active
state machine available, one byte at a time. The state machine changes its
state based on its previous state and the byte it receives. There are 3
states in a state machine that are of interest to an auto-detector:
START state: This is the state to start with, or a legal byte sequence
(i.e. a valid code point) for character has been identified.
ME state: This indicates that the state machine identified a byte sequence
that is specific to the charset it is designed for and that
there is no other possible encoding which can contain this byte
sequence. This will to lead to an immediate positive answer for
the detector.
ERROR state: This indicates the state machine identified an illegal byte
sequence for that encoding. This will lead to an immediate
negative answer for this encoding. Detector will exclude this
encoding from consideration from here on.
"""
def __init__(self, sm):
self._model = sm
self._curr_byte_pos = 0
self._curr_char_len = 0
self._curr_state = None
self.logger = logging.getLogger(__name__)
self.reset()
def reset(self):
self._curr_state = MachineState.START
def next_state(self, c):
# for each byte we get its class
# if it is first byte, we also get byte length
byte_class = self._model['class_table'][c]
if self._curr_state == MachineState.START:
self._curr_byte_pos = 0
self._curr_char_len = self._model['char_len_table'][byte_class]
# from byte's class and state_table, we get its next state
curr_state = (self._curr_state * self._model['class_factor']
+ byte_class)
self._curr_state = self._model['state_table'][curr_state]
self._curr_byte_pos += 1
return self._curr_state
def get_current_charlen(self):
return self._curr_char_len
def get_coding_state_machine(self):
return self._model['name']
@property
def language(self):
return self._model['language']
######################## BEGIN LICENSE BLOCK ########################
# Contributor(s):
# Dan Blanchard
# Ian Cordasco
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
import sys
if sys.version_info < (3, 0):
PY2 = True
PY3 = False
base_str = (str, unicode)
text_type = unicode
else:
PY2 = False
PY3 = True
base_str = (bytes, str)
text_type = str
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is mozilla.org code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .chardistribution import EUCKRDistributionAnalysis
from .codingstatemachine import CodingStateMachine
from .mbcharsetprober import MultiByteCharSetProber
from .mbcssm import CP949_SM_MODEL
class CP949Prober(MultiByteCharSetProber):
def __init__(self):
super(CP949Prober, self).__init__()
self.coding_sm = CodingStateMachine(CP949_SM_MODEL)
# NOTE: CP949 is a superset of EUC-KR, so the distribution should be
# not different.
self.distribution_analyzer = EUCKRDistributionAnalysis()
self.reset()
@property
def charset_name(self):
return "CP949"
@property
def language(self):
return "Korean"
"""
All of the Enums that are used throughout the chardet package.
:author: Dan Blanchard (dan.blanchard@gmail.com)
"""
class InputState(object):
"""
This enum represents the different states a universal detector can be in.
"""
PURE_ASCII = 0
ESC_ASCII = 1
HIGH_BYTE = 2
class LanguageFilter(object):
"""
This enum represents the different language filters we can apply to a
``UniversalDetector``.
"""
CHINESE_SIMPLIFIED = 0x01
CHINESE_TRADITIONAL = 0x02
JAPANESE = 0x04
KOREAN = 0x08
NON_CJK = 0x10
ALL = 0x1F
CHINESE = CHINESE_SIMPLIFIED | CHINESE_TRADITIONAL
CJK = CHINESE | JAPANESE | KOREAN
class ProbingState(object):
"""
This enum represents the different states a prober can be in.
"""
DETECTING = 0
FOUND_IT = 1
NOT_ME = 2
class MachineState(object):
"""
This enum represents the different states a state machine can be in.
"""
START = 0
ERROR = 1
ITS_ME = 2
class SequenceLikelihood(object):
"""
This enum represents the likelihood of a character following the previous one.
"""
NEGATIVE = 0
UNLIKELY = 1
LIKELY = 2
POSITIVE = 3
@classmethod
def get_num_categories(cls):
""":returns: The number of likelihood categories in the enum."""
return 4
class CharacterCategory(object):
"""
This enum represents the different categories language models for
``SingleByteCharsetProber`` put characters into.
Anything less than CONTROL is considered a letter.
"""
UNDEFINED = 255
LINE_BREAK = 254
SYMBOL = 253
DIGIT = 252
CONTROL = 251
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is mozilla.org code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .charsetprober import CharSetProber
from .codingstatemachine import CodingStateMachine
from .enums import LanguageFilter, ProbingState, MachineState
from .escsm import (HZ_SM_MODEL, ISO2022CN_SM_MODEL, ISO2022JP_SM_MODEL,
ISO2022KR_SM_MODEL)
class EscCharSetProber(CharSetProber):
"""
This CharSetProber uses a "code scheme" approach for detecting encodings,
whereby easily recognizable escape or shift sequences are relied on to
identify these encodings.
"""
def __init__(self, lang_filter=None):
super(EscCharSetProber, self).__init__(lang_filter=lang_filter)
self.coding_sm = []
if self.lang_filter & LanguageFilter.CHINESE_SIMPLIFIED:
self.coding_sm.append(CodingStateMachine(HZ_SM_MODEL))
self.coding_sm.append(CodingStateMachine(ISO2022CN_SM_MODEL))
if self.lang_filter & LanguageFilter.JAPANESE:
self.coding_sm.append(CodingStateMachine(ISO2022JP_SM_MODEL))
if self.lang_filter & LanguageFilter.KOREAN:
self.coding_sm.append(CodingStateMachine(ISO2022KR_SM_MODEL))
self.active_sm_count = None
self._detected_charset = None
self._detected_language = None
self._state = None
self.reset()
def reset(self):
super(EscCharSetProber, self).reset()
for coding_sm in self.coding_sm:
if not coding_sm:
continue
coding_sm.active = True
coding_sm.reset()
self.active_sm_count = len(self.coding_sm)
self._detected_charset = None
self._detected_language = None
@property
def charset_name(self):
return self._detected_charset
@property
def language(self):
return self._detected_language
def get_confidence(self):
if self._detected_charset:
return 0.99
else:
return 0.00
def feed(self, byte_str):
for c in byte_str:
for coding_sm in self.coding_sm:
if not coding_sm or not coding_sm.active:
continue
coding_state = coding_sm.next_state(c)
if coding_state == MachineState.ERROR:
coding_sm.active = False
self.active_sm_count -= 1
if self.active_sm_count <= 0:
self._state = ProbingState.NOT_ME
return self.state
elif coding_state == MachineState.ITS_ME:
self._state = ProbingState.FOUND_IT
self._detected_charset = coding_sm.get_coding_state_machine()
self._detected_language = coding_sm.language
return self.state
return self.state
This diff is collapsed.
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is mozilla.org code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .enums import ProbingState, MachineState
from .mbcharsetprober import MultiByteCharSetProber
from .codingstatemachine import CodingStateMachine
from .chardistribution import EUCJPDistributionAnalysis
from .jpcntx import EUCJPContextAnalysis
from .mbcssm import EUCJP_SM_MODEL
class EUCJPProber(MultiByteCharSetProber):
def __init__(self):
super(EUCJPProber, self).__init__()
self.coding_sm = CodingStateMachine(EUCJP_SM_MODEL)
self.distribution_analyzer = EUCJPDistributionAnalysis()
self.context_analyzer = EUCJPContextAnalysis()
self.reset()
def reset(self):
super(EUCJPProber, self).reset()
self.context_analyzer.reset()
@property
def charset_name(self):
return "EUC-JP"
@property
def language(self):
return "Japanese"
def feed(self, byte_str):
for i in range(len(byte_str)):
# PY3K: byte_str is a byte array, so byte_str[i] is an int, not a byte
coding_state = self.coding_sm.next_state(byte_str[i])
if coding_state == MachineState.ERROR:
self.logger.debug('%s %s prober hit error at byte %s',
self.charset_name, self.language, i)
self._state = ProbingState.NOT_ME
break
elif coding_state == MachineState.ITS_ME:
self._state = ProbingState.FOUND_IT
break
elif coding_state == MachineState.START:
char_len = self.coding_sm.get_current_charlen()
if i == 0:
self._last_char[1] = byte_str[0]
self.context_analyzer.feed(self._last_char, char_len)
self.distribution_analyzer.feed(self._last_char, char_len)
else:
self.context_analyzer.feed(byte_str[i - 1:i + 1],
char_len)
self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
char_len)
self._last_char[0] = byte_str[-1]
if self.state == ProbingState.DETECTING:
if (self.context_analyzer.got_enough_data() and
(self.get_confidence() > self.SHORTCUT_THRESHOLD)):
self._state = ProbingState.FOUND_IT
return self.state
def get_confidence(self):
context_conf = self.context_analyzer.get_confidence()
distrib_conf = self.distribution_analyzer.get_confidence()
return max(context_conf, distrib_conf)
This diff is collapsed.
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is mozilla.org code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .mbcharsetprober import MultiByteCharSetProber
from .codingstatemachine import CodingStateMachine
from .chardistribution import EUCKRDistributionAnalysis
from .mbcssm import EUCKR_SM_MODEL
class EUCKRProber(MultiByteCharSetProber):
def __init__(self):
super(EUCKRProber, self).__init__()
self.coding_sm = CodingStateMachine(EUCKR_SM_MODEL)
self.distribution_analyzer = EUCKRDistributionAnalysis()
self.reset()
@property
def charset_name(self):
return "EUC-KR"
@property
def language(self):
return "Korean"
This diff is collapsed.
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is mozilla.org code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .mbcharsetprober import MultiByteCharSetProber
from .codingstatemachine import CodingStateMachine
from .chardistribution import EUCTWDistributionAnalysis
from .mbcssm import EUCTW_SM_MODEL
class EUCTWProber(MultiByteCharSetProber):
def __init__(self):
super(EUCTWProber, self).__init__()
self.coding_sm = CodingStateMachine(EUCTW_SM_MODEL)
self.distribution_analyzer = EUCTWDistributionAnalysis()
self.reset()
@property
def charset_name(self):
return "EUC-TW"
@property
def language(self):
return "Taiwan"
This diff is collapsed.
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is mozilla.org code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .mbcharsetprober import MultiByteCharSetProber
from .codingstatemachine import CodingStateMachine
from .chardistribution import GB2312DistributionAnalysis
from .mbcssm import GB2312_SM_MODEL
class GB2312Prober(MultiByteCharSetProber):
def __init__(self):
super(GB2312Prober, self).__init__()
self.coding_sm = CodingStateMachine(GB2312_SM_MODEL)
self.distribution_analyzer = GB2312DistributionAnalysis()
self.reset()
@property
def charset_name(self):
return "GB2312"
@property
def language(self):
return "Chinese"
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Universal charset detector code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 2001
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
# Shy Shalom - original C code
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .charsetprober import CharSetProber
from .enums import ProbingState
FREQ_CAT_NUM = 4
UDF = 0 # undefined
OTH = 1 # other
ASC = 2 # ascii capital letter
ASS = 3 # ascii small letter
ACV = 4 # accent capital vowel
ACO = 5 # accent capital other
ASV = 6 # accent small vowel
ASO = 7 # accent small other
CLASS_NUM = 8 # total classes
Latin1_CharToClass = (
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F
OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57
ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F
OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67
ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F
ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77
ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F
OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, # 80 - 87
OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, # 88 - 8F
UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 90 - 97
OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, # 98 - 9F
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A0 - A7
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A8 - AF
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B8 - BF
ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, # C0 - C7
ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # C8 - CF
ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, # D0 - D7
ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, # D8 - DF
ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, # E0 - E7
ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # E8 - EF
ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, # F0 - F7
ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, # F8 - FF
)
# 0 : illegal
# 1 : very unlikely
# 2 : normal
# 3 : very likely
Latin1ClassModel = (
# UDF OTH ASC ASS ACV ACO ASV ASO
0, 0, 0, 0, 0, 0, 0, 0, # UDF
0, 3, 3, 3, 3, 3, 3, 3, # OTH
0, 3, 3, 3, 3, 3, 3, 3, # ASC
0, 3, 3, 3, 1, 1, 3, 3, # ASS
0, 3, 3, 3, 1, 2, 1, 2, # ACV
0, 3, 3, 3, 3, 3, 3, 3, # ACO
0, 3, 1, 3, 1, 1, 1, 3, # ASV
0, 3, 1, 3, 1, 1, 3, 3, # ASO
)
class Latin1Prober(CharSetProber):
def __init__(self):
super(Latin1Prober, self).__init__()
self._last_char_class = None
self._freq_counter = None
self.reset()
def reset(self):
self._last_char_class = OTH
self._freq_counter = [0] * FREQ_CAT_NUM
CharSetProber.reset(self)
@property
def charset_name(self):
return "ISO-8859-1"
@property
def language(self):
return ""
def feed(self, byte_str):
byte_str = self.filter_with_english_letters(byte_str)
for c in byte_str:
char_class = Latin1_CharToClass[c]
freq = Latin1ClassModel[(self._last_char_class * CLASS_NUM)
+ char_class]
if freq == 0:
self._state = ProbingState.NOT_ME
break
self._freq_counter[freq] += 1
self._last_char_class = char_class
return self.state
def get_confidence(self):
if self.state == ProbingState.NOT_ME:
return 0.01
total = sum(self._freq_counter)
if total < 0.01:
confidence = 0.0
else:
confidence = ((self._freq_counter[3] - self._freq_counter[1] * 20.0)
/ total)
if confidence < 0.0:
confidence = 0.0
# lower the confidence of latin1 so that other more accurate
# detector can take priority.
confidence = confidence * 0.73
return confidence
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment