-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patharchive-old-files.sh
executable file
·134 lines (107 loc) · 6.22 KB
/
archive-old-files.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/bin/bash
#
# I'm assuming that this script will be run on day 15 of each month.
#
COVID_RUN_DIR=/mnt/hinoki/share/covid19/run
ARCHIVE_DIR=$COVID_RUN_DIR/archives
NEW_HTML_FILES_DIR=$COVID_RUN_DIR/new-html-files
NEW_TRANSLATED_FILES_DIR=$COVID_RUN_DIR/new-translated-files
NEW_XML_FILES_DIR=$COVID_RUN_DIR/new-xml-files
EXTRACTER_DIR=$COVID_RUN_DIR/extracter
EXTRACTER_LOG_DIR=/home/frederic/covid19/translation/logs
CONVERTER_LOG_DIR=/home/frederic/covid19/translation/logs
now=$(date +'%Y/%m/%d %H:%M')
month=$(date +'%m')
year=$(date +'%Y')
echo "Archiving old files. Started on $now."
#
# Initialize tmp_year and tmp_month.
# I will move data that belong to the 3 previous
# month from the current month.
#
if [[ ${month#0} -gt 3 ]];
then
tmp_year=$year
tmp_month=$(( ${month#0} - 3 ))
else
tmp_year=$(( $year - 1 ))
tmp_month=$(( 9 + ${month#0} ))
fi
tmp_month=$(printf %02d $tmp_month)
for iter in 1 2 3
do
echo "Archiving files from $tmp_year/$tmp_month..."
# Archive new-html-files.
mkdir -p $ARCHIVE_DIR/new-html-files/$tmp_year/$tmp_month
mv $NEW_HTML_FILES_DIR/new-html-files-$tmp_year-$tmp_month*.txt $ARCHIVE_DIR/new-html-files/$tmp_year/$tmp_month/.
find $ARCHIVE_DIR/new-html-files/$tmp_year/$tmp_month -type f -iname "*.txt" -exec gzip {} \;
# Archive new-translated-files.
mkdir -p $ARCHIVE_DIR/new-translated-files/$tmp_year/$tmp_month
mv $NEW_TRANSLATED_FILES_DIR/new-translated-files-$tmp_year-$tmp_month*.txt* $ARCHIVE_DIR/new-translated-files/$tmp_year/$tmp_month/.
find $ARCHIVE_DIR/new-translated-files/$tmp_year/$tmp_month -type f -iname "*.txt" -exec gzip {} \;
mv $NEW_TRANSLATED_FILES_DIR/new-translated-files-en-$tmp_year-$tmp_month*.txt* $ARCHIVE_DIR/new-translated-files/$tmp_year/$tmp_month/.
find $ARCHIVE_DIR/new-translated-files/$tmp_year/$tmp_month -type f -iname "*.txt" -exec gzip {} \;
# Archive new-xml-files.
mkdir -p $ARCHIVE_DIR/new-xml-files/$tmp_year/$tmp_month
mv $NEW_XML_FILES_DIR/new-xml-files-$tmp_year-$tmp_month*.txt $ARCHIVE_DIR/new-xml-files/$tmp_year/$tmp_month/.
find $ARCHIVE_DIR/new-xml-files/$tmp_year/$tmp_month -type f -iname "*.txt" -exec gzip {} \;
rm -f $NEW_XML_FILES_DIR/new-xml-files-$tmp_year-$tmp_month*.txt.lock
# Archive extracted files.
for domain_dir in $EXTRACTER_DIR/*
do
domain=$(basename $domain_dir)
mkdir -p $ARCHIVE_DIR/extracter/$domain/$tmp_year/$tmp_month
mv $domain_dir/extracter_$tmp_year-$tmp_month*.txt $ARCHIVE_DIR/extracter/$domain/$tmp_year/$tmp_month/.
find $ARCHIVE_DIR/extracter/$domain/$tmp_year/$tmp_month -type f -iname "*.txt" -exec gzip {} \;
done
# Archive extracter log files.
mkdir -p $ARCHIVE_DIR/logs/extracter/$tmp_year/$tmp_month
mv $EXTRACTER_LOG_DIR/extracter.log.$tmp_year-$tmp_month* $ARCHIVE_DIR/logs/extracter/$tmp_year/$tmp_month/.
find $ARCHIVE_DIR/logs/extracter/$tmp_year/$tmp_month -type f -exec gzip {} \;
# Archive converter log files.
mkdir -p $ARCHIVE_DIR/logs/converter/$tmp_year/$tmp_month
mv $CONVERTER_LOG_DIR/converter.log.$tmp_year-$tmp_month* $ARCHIVE_DIR/logs/converter/$tmp_year/$tmp_month/.
find $ARCHIVE_DIR/logs/converter/$tmp_year/$tmp_month -type f -exec gzip {} \;
# Archive stats files.
mkdir -p $COVID_RUN_DIR/stats/$tmp_year/$tmp_month
mv $COVID_RUN_DIR/stats/stats-$tmp_year-$tmp_month-* $COVID_RUN_DIR/stats/$tmp_year/$tmp_month/.
# Archive new-twitter-html-files.
mkdir -p $ARCHIVE_DIR/new-twitter-html-files/$tmp_year/$tmp_month
mv $NEW_HTML_FILES_DIR/new-twitter-html-files-$tmp_year-$tmp_month*.txt $ARCHIVE_DIR/new-twitter-html-files/$tmp_year/$tmp_month/.
find $ARCHIVE_DIR/new-twitter-html-files/$tmp_year/$tmp_month -type f -iname "*.txt" -exec gzip {} \;
# Archive new-twitter-translated-files.
mkdir -p $ARCHIVE_DIR/new-twitter-translated-files/$tmp_year/$tmp_month
mv $NEW_TRANSLATED_FILES_DIR/new-twitter-translated-files-$tmp_year-$tmp_month*.txt* $ARCHIVE_DIR/new-twitter-translated-files/$tmp_year/$tmp_month/.
find $ARCHIVE_DIR/new-twitter-translated-files/$tmp_year/$tmp_month -type f -iname "*.txt" -exec gzip {} \;
mv $NEW_TRANSLATED_FILES_DIR/new-twitter-translated-files-en-$tmp_year-$tmp_month*.txt* $ARCHIVE_DIR/new-twitter-translated-files/$tmp_year/$tmp_month/.
find $ARCHIVE_DIR/new-twitter-translated-files/$tmp_year/$tmp_month -type f -iname "*.txt" -exec gzip {} \;
# Archive new-twitter-xml-files.
mkdir -p $ARCHIVE_DIR/new-twitter-xml-files/$tmp_year/$tmp_month
mv $NEW_XML_FILES_DIR/new-twitter-xml-files-$tmp_year-$tmp_month*.txt $ARCHIVE_DIR/new-twitter-xml-files/$tmp_year/$tmp_month/.
find $ARCHIVE_DIR/new-twitter-xml-files/$tmp_year/$tmp_month -type f -iname "*.txt" -exec gzip {} \;
rm -f $NEW_XML_FILES_DIR/new-twitter-xml-files-$tmp_year-$tmp_month*.txt.lock
# Archive twitter extracted files.
mkdir -p $ARCHIVE_DIR/twitter-extracter/$tmp_year/$tmp_month
mv $COVID_RUN_DIR/twitter-extracter/extracter_$tmp_year-$tmp_month*.txt $ARCHIVE_DIR/twitter-extracter/$tmp_year/$tmp_month
find $ARCHIVE_DIR/twitter-extracter/$tmp_year/$tmp_month -type f -iname "*.txt" -exec gzip {} \;
# Archive twitter extracter log files.
mkdir -p $ARCHIVE_DIR/logs/twitter-extracter/$tmp_year/$tmp_month
mv $EXTRACTER_LOG_DIR/twitter_extracter.log.$tmp_year-$tmp_month* $ARCHIVE_DIR/logs/twitter-extracter/$tmp_year/$tmp_month/.
find $ARCHIVE_DIR/logs/twitter-extracter/$tmp_year/$tmp_month -type f -exec gzip {} \;
# Archive twitter converter log files.
mkdir -p $ARCHIVE_DIR/logs/twitter-converter/$tmp_year/$tmp_month
mv $CONVERTER_LOG_DIR/twitter_converter.log.$tmp_year-$tmp_month* $ARCHIVE_DIR/logs/twitter-converter/$tmp_year/$tmp_month/.
find $ARCHIVE_DIR/logs/twitter-converter/$tmp_year/$tmp_month -type f -exec gzip {} \;
# Archive twitter stats files.
mkdir -p $COVID_RUN_DIR/twitter-stats/$tmp_year/$tmp_month
mv $COVID_RUN_DIR/twitter-stats/twitter-stats-$tmp_year-$tmp_month-* $COVID_RUN_DIR/twitter-stats/$tmp_year/$tmp_month/.
if [[ ${tmp_month#0} -eq 12 ]];
then
tmp_month=01
tmp_year=$(( $tmp_year + 1 ))
else
tmp_month=$(( ${tmp_month#0} + 1 ))
tmp_month=$(printf %02d $tmp_month)
fi
done
echo "Files have been archived."