-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #63 from standage/cli/sum
Added simple summary script to CLI
- Loading branch information
Showing
7 changed files
with
82 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
#!/usr/bin/env python | ||
# | ||
# ----------------------------------------------------------------------------- | ||
# Copyright (C) 2017 Daniel Standage <[email protected]> | ||
# | ||
# This file is part of tag (http://github.com/standage/tag) and is licensed | ||
# under the BSD 3-clause license: see LICENSE. | ||
# ----------------------------------------------------------------------------- | ||
|
||
from __future__ import print_function | ||
import argparse | ||
from collections import defaultdict | ||
import tag | ||
|
||
|
||
def subparser(subparsers): | ||
desc = 'Briefly summarize a GFF3 file' | ||
subparser = subparsers.add_parser('sum', description=desc) | ||
subparser.add_argument('gff3', help='input file') | ||
|
||
|
||
def main(args): | ||
annot = tag.index.Index() | ||
annot.consume_file(args.gff3) | ||
annot.yield_inferred = False | ||
|
||
seqs = list(annot.seqids) | ||
size = 0 | ||
for seqid in seqs: | ||
start, end = annot.extent(seqid) | ||
size += end - start | ||
|
||
counts = defaultdict(int) | ||
maxlens = defaultdict(int) | ||
for feature in tag.select.features(annot): | ||
for subfeat in feature: | ||
counts[subfeat.type] += 1 | ||
maxlens[subfeat.type] = max(len(subfeat), maxlens[subfeat.type]) | ||
|
||
sumstr = 'Summary for file "{}":\n'.format(args.gff3) | ||
sumstr += ' - {} annotated sequences'.format(len(seqs)) | ||
sumstr += ' for a total length of {} bp\n'.format(size) | ||
sumstr += ' - {} annotated features'.format(sum(counts.values())) | ||
sumstr += ' (or feature entries)\n' | ||
for ft in sorted(counts): | ||
sumstr += ' - {} entries of type {}'.format(counts[ft], ft) | ||
sumstr += ', maximum length: {} bp\n'.format(maxlens[ft]) | ||
|
||
print(sumstr.strip()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
Summary for file "tests/testdata/GCF_001639295.1_ASM163929v1_genomic.gff.gz": | ||
- 232 annotated sequences for a total length of 2414608 bp | ||
- 4298 annotated features (or feature entries) | ||
- 1973 entries of type CDS, maximum length: 12025 bp | ||
- 1 entries of type RNase_P_RNA, maximum length: 274 bp | ||
- 1 entries of type SRP_RNA, maximum length: 316 bp | ||
- 40 entries of type exon, maximum length: 2857 bp | ||
- 2012 entries of type gene, maximum length: 12025 bp | ||
- 4 entries of type rRNA, maximum length: 2857 bp | ||
- 232 entries of type region, maximum length: 74332 bp | ||
- 2 entries of type repeat_region, maximum length: 3602 bp | ||
- 33 entries of type tRNA, maximum length: 107 bp |